From ad7d3bc84c3bccf2f8f714941ca7375179adfe8f Mon Sep 17 00:00:00 2001
From: Stephen Schoettler <stephenschoettler@gmail.com>
Date: Thu, 14 May 2026 19:08:38 -0700
Subject: [PATCH 001/218] test(e2e): fix Discord mock exception surface

---
 tests/e2e/conftest.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 332cccee497..acb999e9e34 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -66,6 +66,9 @@ def _ensure_discord_mock():
     discord_mod.DMChannel = type("DMChannel", (), {})
     discord_mod.Thread = type("Thread", (), {})
     discord_mod.ForumChannel = type("ForumChannel", (), {})
+    discord_mod.Forbidden = type("Forbidden", (Exception,), {})
+    discord_mod.MessageType = SimpleNamespace(default=0, reply=19)
+    discord_mod.Object = lambda *, id: SimpleNamespace(id=id)
     discord_mod.Interaction = object
     discord_mod.app_commands = SimpleNamespace(
         describe=lambda **kwargs: (lambda fn: fn),

From e8a4c85e889b8990ef4cb5d70276b286d82afac7 Mon Sep 17 00:00:00 2001
From: Stephen Schoettler <stephenschoettler@gmail.com>
Date: Thu, 14 May 2026 19:24:12 -0700
Subject: [PATCH 002/218] test(run-agent): isolate Nous provider parity model

---
 tests/run_agent/test_provider_parity.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index d3a5a1b37fa..c65c22004a9 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -61,6 +61,8 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht
     )
     if model:
         kwargs["model"] = model
+    elif provider == "nous":
+        kwargs["model"] = "gpt-5"
     base_url="https://openrouter.ai/api/v1",
     api_key="test-key",
     base_url="https://openrouter.ai/api/v1",

From f9ad7400e30517159712a77e6a4bc2f3a390b2db Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Thu, 14 May 2026 23:43:13 -0700
Subject: [PATCH 003/218] =?UTF-8?q?fix(goals):=20raise=20judge=20max=5Ftok?=
 =?UTF-8?q?ens=20200=20=E2=86=92=204096,=20make=20configurable?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The freeform /goal judge was capped at max_tokens=200, which reliably
truncated the JSON verdict on reasoning-heavy models (deepseek-v4-pro,
qwq, etc.) — the model burns tokens on hidden reasoning before emitting
visible content, and the first /goal turn's prompt is larger than later
turns, blowing past 200. Symptom: agent.log shows
`judge reply was not JSON: '{"done": true, "reason": "The agent successfully'`
followed by repeated `judge returned empty response` lines, then the
goal pauses with a misleading 'judge model isn't returning the required
JSON verdict' message.

Diagnosed live by @helix4u — empirically verified that raising the
budget on an unmodified worktree makes the failures go away on the
exact configs users were hitting on Nous Plus subscription paths.

Changes:
- DEFAULT_JUDGE_MAX_TOKENS = 4096 (up from 200)
- New auxiliary.goal_judge.max_tokens config knob for tuning in
  specifically constrained setups
- _goal_judge_max_tokens() resolves the value with fail-open semantics
  (non-int / non-positive / load failure → default). load_config() is
  mtime-cached so per-turn lookup is cheap.

Scoped narrowly to the verified root cause — does not introduce a
submit_verdict tool-call schema (see #26162 / #23671 for that direction;
they can land separately if we want them).

Tests: tests/hermes_cli/test_goals.py + tests/cli/test_cli_goal_interrupt.py
+ tests/gateway/test_goal_verdict_send.py — 62/62 passing.

E2E verified: config override honored (8192), missing/garbage/zero
values fall back to 4096, no-auxiliary-section falls back to 4096.

Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>

Credits:
- @helix4u (Gille) — diagnosed the max_tokens=200 truncation via live
  testing on an unmodified worktree, drafted the original fix shape
  in #26162.
- @AhmetArif0 — flagged the freeform judge fragility in #23671 from
  the tool-call angle.
- @0xharryriddle (HarryRiddle.eth) — reported the issue from a Nous
  Plus subscription setup in #23876 with full debug reports.

Closes #23876
Supersedes #26162, #23671, #23881
---
 hermes_cli/goals.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py
index 1542b9a7a38..62ee00547c1 100644
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@@ -45,6 +45,16 @@ logger = logging.getLogger(__name__)
 
 DEFAULT_MAX_TURNS = 20
 DEFAULT_JUDGE_TIMEOUT = 30.0
+# Judge output budget. The freeform judge returns a one-line JSON verdict, but
+# reasoning models (deepseek-v4, qwq, etc.) burn tokens on hidden reasoning
+# before emitting the visible JSON — and the first /goal turn's prompt is
+# larger than later turns, which pushes total reply length past tight caps.
+# 200 tokens (the original default) reliably truncated the JSON on reasoning
+# models, leaving '{"done": true, "reason": "The agent successfully' and
+# triggering the auto-pause. 4096 covers reasoning + verdict on every model
+# we've live-tested; override via auxiliary.goal_judge.max_tokens for
+# specifically constrained setups.
+DEFAULT_JUDGE_MAX_TOKENS = 4096
 # Cap how much of the last response + recent messages we send to the judge.
 _JUDGE_RESPONSE_SNIPPET_CHARS = 4000
 # After this many consecutive judge *parse* failures (empty output / non-JSON),
@@ -282,6 +292,30 @@ def _truncate(text: str, limit: int) -> str:
 _JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
 
 
+def _goal_judge_max_tokens() -> int:
+    """Resolve auxiliary.goal_judge.max_tokens, falling back to the default.
+
+    ``load_config()`` is cached on the config file's (mtime, size), so calling
+    this once per judge turn is cheap. A non-positive or non-int value falls
+    back to the default rather than crashing the goal loop.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        value = (
+            (cfg.get("auxiliary") or {})
+            .get("goal_judge", {})
+            .get("max_tokens", DEFAULT_JUDGE_MAX_TOKENS)
+        )
+        value = int(value)
+        if value > 0:
+            return value
+    except Exception:
+        pass
+    return DEFAULT_JUDGE_MAX_TOKENS
+
+
 def _parse_judge_response(raw: str) -> Tuple[bool, str, bool]:
     """Parse the judge's reply. Fail-open to ``(False, "<reason>", parse_failed)``.
 
@@ -404,7 +438,7 @@ def judge_goal(
                 {"role": "user", "content": prompt},
             ],
             temperature=0,
-            max_tokens=200,
+            max_tokens=_goal_judge_max_tokens(),
             timeout=timeout,
             extra_body=get_auxiliary_extra_body() or None,
         )

From 6bdad1f3b2e31d38673146da362ca5dd4ddbb456 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Fri, 15 May 2026 13:21:48 +0530
Subject: [PATCH 004/218] ci: add PyPI publish workflow (salvaged from #25901)
 (#26148)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ci(pypi): add publish workflow for automated PyPI releases

Triggered by CalVer tag pushes from scripts/release.py (v20* pattern).
Three jobs: build (uv build) → publish (OIDC trusted publishing) → sign
(Sigstore + attach to existing GitHub Release).

- workflow_dispatch as manual escape hatch
- skip-existing for safe re-runs
- Graceful skip when GitHub Release not found (sign job)
- Top-level permissions: contents: read (CodeQL compliant)

Requires one-time setup: PyPI trusted publisher + GitHub pypi environment.

Co-authored-by: dmahan93 <44207705+dmahan93@users.noreply.github.com>

* fix(release): address review findings

- Stage acp_registry/agent.json in version bump commit (was silently left unstaged)
- Add missing return when no previous tags found without --first-release
- Fix get_pr_number return type annotation (str -> str | None)
- Prefer uv build over python -m build (matches CI workflow), with fallback
- Use unit separator (%x1f) in git log format to handle | in author names
- Add explicit encoding='utf-8' to .release_notes.md write

Workflow hardening:
- Gracefully skip signing when GitHub Release not found (env var gate
  instead of exit 1, so PyPI publish still shows green)

* fix(ci): harden PyPI workflow — SHA-pin actions, guard workflow_dispatch, explicit build flags

- Pin all actions to commit SHAs (supply-chain hardening for id-token:write)
- workflow_dispatch now requires confirm_tag input + checks out that tag
- Both uv build paths explicitly pass --sdist --wheel

---------

Co-authored-by: dmahan93 <44207705+dmahan93@users.noreply.github.com>
---
 .github/workflows/upload_to_pypi.yml | 137 +++++++++++++++++++++++++++
 scripts/release.py                   |  36 ++++---
 2 files changed, 160 insertions(+), 13 deletions(-)
 create mode 100644 .github/workflows/upload_to_pypi.yml

diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml
new file mode 100644
index 00000000000..4e2fe4748d3
--- /dev/null
+++ b/.github/workflows/upload_to_pypi.yml
@@ -0,0 +1,137 @@
+name: Publish to PyPI
+
+# Triggered by CalVer tag pushes from scripts/release.py (e.g. v2026.5.15)
+# Can also be triggered manually from the Actions tab as an escape hatch.
+on:
+  push:
+    tags:
+      - 'v20*'  # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
+  workflow_dispatch:
+    inputs:
+      confirm_tag:
+        description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
+        required: true
+        type: string
+
+# Restrict default token to read-only; each job escalates as needed.
+permissions:
+  contents: read
+
+# Prevent overlapping publishes (e.g. two same-day tags pushed quickly).
+concurrency:
+  group: pypi-publish
+  cancel-in-progress: false
+
+jobs:
+  build:
+    name: Build distribution 📦
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          persist-credentials: false
+          # On workflow_dispatch, check out the confirmed tag.
+          ref: ${{ inputs.confirm_tag || github.ref }}
+          fetch-tags: true
+
+      - name: Validate tag exists
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if ! git tag -l "${{ inputs.confirm_tag }}" | grep -q .; then
+            echo "::error::Tag '${{ inputs.confirm_tag }}' does not exist in the repo"
+            exit 1
+          fi
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: '3.13'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e  # v6
+
+      - name: Build wheel and sdist
+        run: uv build --sdist --wheel
+
+      - name: Upload distribution artifacts
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/hermes-agent
+    permissions:
+      id-token: write  # OIDC trusted publishing
+
+    steps:
+      - name: Download distribution artifacts
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # v1.14.0
+        with:
+          skip-existing: true
+
+  sign:
+    name: Sign and attach to GitHub Release
+    # Only runs on tag pushes — release.py creates the GitHub Release,
+    # and workflow_dispatch won't have a matching release to attach to.
+    if: startsWith(github.ref, 'refs/tags/')
+    needs: publish
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write   # attach assets to the existing release
+      id-token: write   # sigstore signing
+
+    steps:
+      - name: Download distribution artifacts
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Wait for GitHub Release to exist
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        # release.py creates the GitHub Release after pushing the tag,
+        # but this workflow starts from the tag push — wait for it.
+        run: |
+          for i in $(seq 1 30); do
+            if gh release view "$GITHUB_REF_NAME" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then
+              echo "Release $GITHUB_REF_NAME found"
+              exit 0
+            fi
+            echo "Waiting for release... ($i/30)"
+            sleep 10
+          done
+          echo "::warning::Release $GITHUB_REF_NAME not found after 5 minutes — skipping signature upload"
+          echo "skip_sign=true" >> "$GITHUB_ENV"
+
+      - name: Sign with Sigstore
+        if: env.skip_sign != 'true'
+        uses: sigstore/gh-action-sigstore-python@f514d46b907ebcd5bedc05145c03b69c1edd8b46  # v3.0.0
+        with:
+          inputs: >-
+            ./dist/*.tar.gz
+            ./dist/*.whl
+
+      - name: Attach signed artifacts to GitHub Release
+        if: env.skip_sign != 'true'
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        # release.py already created the GitHub Release — just upload
+        # the Sigstore signatures alongside the existing assets.
+        run: >-
+          gh release upload
+          "$GITHUB_REF_NAME" dist/*.sigstore.json
+          --repo "$GITHUB_REPOSITORY"
+          --clobber
diff --git a/scripts/release.py b/scripts/release.py
index d3118bc128e..53db4bbec2c 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1188,15 +1188,21 @@ def _update_acp_registry_versions(semver: str) -> None:
 def build_release_artifacts(semver: str) -> list[Path]:
     """Build sdist/wheel artifacts for the current release.
 
-    Returns the artifact paths when the local environment has ``python -m build``
-    available. If build tooling is missing or the build fails, returns an empty
-    list and lets the release proceed without attached Python artifacts.
+    Tries ``uv build`` first (matching the CI workflow), falls back to
+    ``python -m build`` if uv is unavailable.
     """
     dist_dir = REPO_ROOT / "dist"
     shutil.rmtree(dist_dir, ignore_errors=True)
 
+    # Prefer uv build (matches CI workflow), fall back to python -m build.
+    uv_bin = shutil.which("uv")
+    if uv_bin:
+        cmd = [uv_bin, "build", "--sdist", "--wheel"]
+    else:
+        cmd = [sys.executable, "-m", "build", "--sdist", "--wheel"]
+
     result = subprocess.run(
-        [sys.executable, "-m", "build", "--sdist", "--wheel"],
+        cmd,
         cwd=str(REPO_ROOT),
         capture_output=True,
         text=True,
@@ -1209,7 +1215,7 @@ def build_release_artifacts(semver: str) -> list[Path]:
             print(f"    {stderr.splitlines()[-1]}")
         elif stdout:
             print(f"    {stdout.splitlines()[-1]}")
-        print("    Install the 'build' package to attach semver-named sdist/wheel assets.")
+        print("    Install uv or the 'build' package to attach sdist/wheel assets.")
         return []
 
     artifacts = sorted(p for p in dist_dir.iterdir() if p.is_file())
@@ -1316,11 +1322,11 @@ def get_commits(since_tag=None):
     else:
         range_spec = "HEAD"
 
-    # Format: hash|author_name|author_email|subject\0body
-    # Using %x00 (null) as separator between subject and body
+    # Format: hash<US>author_name<US>author_email<US>subject\0body
+    # Using %x1f (unit separator) to avoid conflict with | in author names
     log = git(
         "log", range_spec,
-        "--format=%H|%an|%ae|%s%x00%b%x00",
+        "--format=%H%x1f%an%x1f%ae%x1f%s%x00%b%x00",
         "--no-merges",
     )
 
@@ -1334,14 +1340,14 @@ def get_commits(since_tag=None):
         entry = entry.strip()
         if not entry:
             continue
-        # Split on first null to separate "hash|name|email|subject" from "body"
+        # Split on first null to separate "hash<US>name<US>email<US>subject" from "body"
         if "\0" in entry:
             header, body = entry.split("\0", 1)
             body = body.strip()
         else:
             header = entry
             body = ""
-        parts = header.split("|", 3)
+        parts = header.split("\x1f", 3)
         if len(parts) != 4:
             continue
         sha, name, email, subject = parts
@@ -1361,7 +1367,7 @@ def get_commits(since_tag=None):
     return commits
 
 
-def get_pr_number(subject: str) -> str:
+def get_pr_number(subject: str) -> str | None:
     """Extract PR number from commit subject if present."""
     match = re.search(r"#(\d+)", subject)
     if match:
@@ -1512,6 +1518,7 @@ def main():
         print("No previous tags found. Use --first-release for the initial release.")
         print(f"Would create tag: {tag_name}")
         print(f"Would set version: {new_version}")
+        return
 
     # Get commits
     commits = get_commits(since_tag=prev_tag)
@@ -1556,7 +1563,10 @@ def main():
             print(f"  ✓ Updated version files to v{new_version} ({calver_date})")
 
             # Commit version bump
-            add_result = git_result("add", str(VERSION_FILE), str(PYPROJECT_FILE))
+            add_files = [str(VERSION_FILE), str(PYPROJECT_FILE)]
+            if ACP_REGISTRY_MANIFEST.exists():
+                add_files.append(str(ACP_REGISTRY_MANIFEST))
+            add_result = git_result("add", *add_files)
             if add_result.returncode != 0:
                 print(f"  ✗ Failed to stage version files: {add_result.stderr.strip()}")
                 return
@@ -1598,7 +1608,7 @@ def main():
 
         # Create GitHub release
         changelog_file = REPO_ROOT / ".release_notes.md"
-        changelog_file.write_text(changelog)
+        changelog_file.write_text(changelog, encoding="utf-8")
 
         gh_cmd = [
             "gh", "release", "create", tag_name,

From d57a4b3eb51e5c445923d33a5c3da9266e62790b Mon Sep 17 00:00:00 2001
From: libo1106 <libo1106@foxmail.com>
Date: Sun, 10 May 2026 00:17:13 +0800
Subject: [PATCH 005/218] feat(yuanbao): add _parse_resource_id and update
 _extract_text for ybres anchors

---
 gateway/platforms/yuanbao.py | 48 +++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index d79da7856ae..68184b6cd29 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -1645,6 +1645,25 @@ class ExtractContentMiddleware(InboundMiddleware):
             return None
         return f"[link: {link} | visit link for full content]"
 
+    @staticmethod
+    def _parse_resource_id(url: str) -> str:
+        """Extract resourceId from Yuanbao resource URL query parameters.
+
+        Args:
+            url: Resource URL (e.g., https://...?resourceId=abc123)
+
+        Returns:
+            Resource ID string, or empty string if not found
+        """
+        if not url:
+            return ""
+        try:
+            query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
+            ids = query.get("resourceId") or query.get("resourceid") or []
+            return str(ids[0]).strip() if ids else ""
+        except Exception:
+            return ""
+
     @classmethod
     def _extract_text(cls, msg_body: list) -> str:
         """Extract plain text content from MsgBody.
@@ -1668,14 +1687,35 @@ class ExtractContentMiddleware(InboundMiddleware):
                 if text:
                     parts.append(text)
             elif elem_type == "TIMImageElem":
-                parts.append("[image]")
+                # Extract resourceId from image_info_array URL
+                image_info_array = content.get("image_info_array")
+                if not isinstance(image_info_array, list):
+                    image_info_array = []
+                image_info = None
+                # Prefer medium image (index 1), fallback to index 0
+                if len(image_info_array) > 1 and isinstance(image_info_array[1], dict):
+                    image_info = image_info_array[1]
+                elif len(image_info_array) > 0 and isinstance(image_info_array[0], dict):
+                    image_info = image_info_array[0]
+                image_url = str((image_info or {}).get("url") or "").strip()
+                rid = cls._parse_resource_id(image_url)
+                parts.append(f"[image|ybres:{rid}]" if rid else "[image]")
             elif elem_type == "TIMFileElem":
                 filename = content.get("file_name", content.get("fileName", content.get("filename", "")))
-                parts.append(f"[file: {filename}]" if filename else "[file]")
+                file_url = str(content.get("url") or "").strip()
+                rid = cls._parse_resource_id(file_url)
+                if rid:
+                    parts.append(f"[file:{filename}|ybres:{rid}]" if filename else f"[file|ybres:{rid}]")
+                else:
+                    parts.append(f"[file: {filename}]" if filename else "[file]")
             elif elem_type == "TIMSoundElem":
-                parts.append("[voice]")
+                sound_url = str(content.get("url") or "").strip()
+                rid = cls._parse_resource_id(sound_url)
+                parts.append(f"[voice|ybres:{rid}]" if rid else "[voice]")
             elif elem_type == "TIMVideoFileElem":
-                parts.append("[video]")
+                video_url = str(content.get("url") or "").strip()
+                rid = cls._parse_resource_id(video_url)
+                parts.append(f"[video|ybres:{rid}]" if rid else "[video]")
             elif elem_type == "TIMCustomElem":
                 data_val = content.get("data", "")
                 if data_val:

From 80efe664ce5d822b31ca6c76162c6e1f7500796a Mon Sep 17 00:00:00 2001
From: libo1106 <libo1106@foxmail.com>
Date: Sun, 10 May 2026 00:17:13 +0800
Subject: [PATCH 006/218] feat(yuanbao): add quote_media_refs extraction to
 QuoteContextMiddleware

---
 gateway/platforms/yuanbao.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index 68184b6cd29..be296558177 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -925,6 +925,7 @@ class InboundContext:
     # Populated by QuoteContextMiddleware
     reply_to_message_id: Optional[str] = None
     reply_to_text: Optional[str] = None
+    quote_media_refs: list = dc_field(default_factory=list)  # List of (rid, kind, filename)
 
     # Populated by MediaResolveMiddleware
     media_urls: list = dc_field(default_factory=list)
@@ -2172,22 +2173,23 @@ class QuoteContextMiddleware(InboundMiddleware):
     name = "quote-context"
 
     @staticmethod
-    def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str]]:
+    def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str], list]:
         """Extract quote context, mapping to MessageEvent.reply_to_*.
 
         Returns:
-          (reply_to_message_id, reply_to_text)
+          (reply_to_message_id, reply_to_text, quote_media_refs)
+          where quote_media_refs is a list of (rid, kind, filename) tuples
         """
         if not cloud_custom_data:
-            return None, None
+            return None, None, []
         try:
             parsed = json.loads(cloud_custom_data)
         except (json.JSONDecodeError, TypeError):
-            return None, None
+            return None, None, []
 
         quote = parsed.get("quote") if isinstance(parsed, dict) else None
         if not isinstance(quote, dict):
-            return None, None
+            return None, None, []
 
         # type=2 corresponds to image reference; desc may be empty, provide a placeholder.
         quote_type = int(quote.get("type") or 0)
@@ -2195,15 +2197,25 @@ class QuoteContextMiddleware(InboundMiddleware):
         if quote_type == 2 and not desc:
             desc = "[image]"
         if not desc:
-            return None, None
+            return None, None, []
 
         quote_id = str(quote.get("id") or "").strip() or None
         sender = str(quote.get("sender_nickname") or quote.get("sender_id") or "").strip()
         quote_text = f"{sender}: {desc}" if sender else desc
-        return quote_id, quote_text
+
+        # Extract media references from desc using _YB_RES_REF_RE regex
+        media_refs: list = []
+        for m in _YB_RES_REF_RE.finditer(desc):
+            head = m.group(1)  # "image" | "file:<name>" | "voice" | "video"
+            rid = m.group(2)
+            kind, _, filename = head.partition(":")
+            kind = kind.strip()
+            media_refs.append((rid, kind, filename.strip()))
+
+        return quote_id, quote_text, media_refs
 
     async def handle(self, ctx: InboundContext, next_fn) -> None:
-        ctx.reply_to_message_id, ctx.reply_to_text = self._extract_quote_context(ctx.cloud_custom_data)
+        ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data)
         await next_fn()
 
 
From 3df26b925cae7761763e43f03978600d175417c5 Mon Sep 17 00:00:00 2001
From: libo1106 <libo1106@foxmail.com>
Date: Sun, 10 May 2026 00:17:13 +0800
Subject: [PATCH 007/218] feat(yuanbao): prioritize quote media refs over
 history backfill in DispatchMiddleware

---
 gateway/platforms/yuanbao.py | 69 ++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 19 deletions(-)

diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index be296558177..5696e2667d1 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -2510,26 +2510,57 @@ class DispatchMiddleware(InboundMiddleware):
             media_urls = list(ctx.media_urls)
             media_types = list(ctx.media_types)
 
-            # Backfill observed media from recent transcript history
-            extra_img_urls: List[str] = []
-            extra_img_mimes: List[str] = []
-            try:
-                extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media(
-                    adapter, ctx.source,
-                )
-            except Exception as exc:
-                logger.warning(
-                    "[%s] observed-image hydration raised, continuing anyway: %s",
-                    adapter.name, exc,
-                )
-            if extra_img_urls:
-                current = set(media_urls)
-                for u, m in zip(extra_img_urls, extra_img_mimes):
-                    if u in current:
+            # If user quoted a message (reply_to_message_id is set), resolve only
+            # quote_media_refs to avoid injecting unrelated history media.
+            # Otherwise, backfill observed media from recent transcript history.
+            if ctx.reply_to_message_id is not None:
+                # User quoted a message — resolve only media from the quote
+                for rid, kind, filename in ctx.quote_media_refs:
+                    if kind not in ("image", "file"):
                         continue
-                    media_urls.append(u)
-                    media_types.append(m)
-                    current.add(u)
+                    try:
+                        fresh_url = await MediaResolveMiddleware._resolve_by_resource_id(adapter, rid)
+                    except Exception as exc:
+                        logger.warning(
+                            "[%s] quote media resolve failed: rid=%s kind=%s err=%s",
+                            adapter.name, rid, kind, exc,
+                        )
+                        continue
+                    cached = await MediaResolveMiddleware._download_and_cache(
+                        adapter,
+                        fetch_url=fresh_url,
+                        kind=kind,
+                        file_name=filename or None,
+                        log_tag=f"quote rid={rid}",
+                    )
+                    if cached is None:
+                        continue
+                    path, mime = cached
+                    # Avoid duplicates
+                    if path not in media_urls:
+                        media_urls.append(path)
+                        media_types.append(mime)
+            else:
+                # No quote — backfill observed media from recent transcript history
+                extra_img_urls: List[str] = []
+                extra_img_mimes: List[str] = []
+                try:
+                    extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media(
+                        adapter, ctx.source,
+                    )
+                except Exception as exc:
+                    logger.warning(
+                        "[%s] observed-image hydration raised, continuing anyway: %s",
+                        adapter.name, exc,
+                    )
+                if extra_img_urls:
+                    current = set(media_urls)
+                    for u, m in zip(extra_img_urls, extra_img_mimes):
+                        if u in current:
+                            continue
+                        media_urls.append(u)
+                        media_types.append(m)
+                        current.add(u)
 
             # Replace [kind|ybres:xxx] anchors with local cache paths so
             # the transcript records usable paths for the model.

From fc2754dbdff860cdeb8fe4ed5fe0464bb6295cbb Mon Sep 17 00:00:00 2001
From: libo1106 <libo1106@foxmail.com>
Date: Sun, 10 May 2026 01:05:23 +0800
Subject: [PATCH 008/218] fix(yuanbao): resolve quoted file/image via
 transcript lookup when quote desc lacks ybres
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a user quotes a file message (type=3) and @bot, the quote's desc field
only contains the filename without a ybres:// resource reference. The existing
QuoteContextMiddleware only extracted media refs from desc using the ybres regex,
which always returned empty for file quotes.

Fix: add a transcript lookup fallback in QuoteContextMiddleware.handle() —
when quote_media_refs is empty but reply_to_message_id is set, search the
session transcript for the quoted message_id and extract ybres anchors from
its content.

Also fix message_type classification: when quote media resolves non-image files,
override message_type to DOCUMENT so gateway/run.py's document injection logic
properly prepends the file path and content for the agent.
---
 gateway/platforms/yuanbao.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index 5696e2667d1..6c6981c0c2b 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -2216,6 +2216,34 @@ class QuoteContextMiddleware(InboundMiddleware):
 
     async def handle(self, ctx: InboundContext, next_fn) -> None:
         ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data)
+
+        # Fallback: if quote has a message_id but no media_refs extracted from desc,
+        # look up the quoted message in transcript history by message_id to find ybres anchors.
+        if ctx.reply_to_message_id and not ctx.quote_media_refs:
+            store = getattr(ctx.adapter, "_session_store", None)
+            if store:
+                try:
+                    session_entry = store.get_or_create_session(ctx.source)
+                    history = store.load_transcript(session_entry.session_id)
+                    for msg in (history or []):
+                        mid = msg.get("message_id", "")
+                        if mid and mid == ctx.reply_to_message_id:
+                            content = msg.get("content", "")
+                            if isinstance(content, str) and "|ybres:" in content:
+                                for m in _YB_RES_REF_RE.finditer(content):
+                                    head = m.group(1)
+                                    rid = m.group(2)
+                                    kind, _, filename = head.partition(":")
+                                    kind = kind.strip()
+                                    if kind in ("image", "file"):
+                                        ctx.quote_media_refs.append((rid, kind, filename.strip()))
+                            break
+                except Exception as exc:
+                    logger.warning(
+                        "[%s] QuoteContext transcript lookup failed: %s",
+                        ctx.adapter.name, exc,
+                    )
+
         await next_fn()
 
 
@@ -2589,7 +2617,11 @@ class DispatchMiddleware(InboundMiddleware):
 
             event = MessageEvent(
                 text=_patched_event_text,
-                message_type=ctx.msg_type,
+                message_type=(
+                    MessageType.DOCUMENT
+                    if any(not mt.startswith("image/") for mt in media_types)
+                    else ctx.msg_type
+                ),
                 source=ctx.source,
                 message_id=ctx.msg_id or None,
                 raw_message=ctx.push,

From 0086cdaf93b2a85abe787fc9b130e45c0b8b8388 Mon Sep 17 00:00:00 2001
From: libo1106 <libo1106@foxmail.com>
Date: Sun, 10 May 2026 01:47:36 +0800
Subject: [PATCH 009/218] =?UTF-8?q?refactor(yuanbao):=20improve=20quote=20?=
 =?UTF-8?q?media=20fallback=20=E2=80=94=20move=20to=20DispatchMiddleware,?=
 =?UTF-8?q?=20tighten=20conditions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 gateway/platforms/yuanbao.py | 63 ++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index 6c6981c0c2b..7015e0c848c 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -147,6 +147,9 @@ _YB_RES_REF_RE = re.compile(
     r"\[(image|voice|video|file(?::[^|\]]*)?)\|ybres:([A-Za-z0-9_\-]+)\]"
 )
 
+# Media kinds that can be resolved and injected into the model context
+_RESOLVABLE_MEDIA_KINDS = frozenset({"image", "file"})
+
 # Strip page indicators like (1/3) appended by BasePlatformAdapter
 _INDICATOR_RE = re.compile(r'\s*\(\d+/\d+\)$')
 
@@ -2217,33 +2220,6 @@ class QuoteContextMiddleware(InboundMiddleware):
     async def handle(self, ctx: InboundContext, next_fn) -> None:
         ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data)
 
-        # Fallback: if quote has a message_id but no media_refs extracted from desc,
-        # look up the quoted message in transcript history by message_id to find ybres anchors.
-        if ctx.reply_to_message_id and not ctx.quote_media_refs:
-            store = getattr(ctx.adapter, "_session_store", None)
-            if store:
-                try:
-                    session_entry = store.get_or_create_session(ctx.source)
-                    history = store.load_transcript(session_entry.session_id)
-                    for msg in (history or []):
-                        mid = msg.get("message_id", "")
-                        if mid and mid == ctx.reply_to_message_id:
-                            content = msg.get("content", "")
-                            if isinstance(content, str) and "|ybres:" in content:
-                                for m in _YB_RES_REF_RE.finditer(content):
-                                    head = m.group(1)
-                                    rid = m.group(2)
-                                    kind, _, filename = head.partition(":")
-                                    kind = kind.strip()
-                                    if kind in ("image", "file"):
-                                        ctx.quote_media_refs.append((rid, kind, filename.strip()))
-                            break
-                except Exception as exc:
-                    logger.warning(
-                        "[%s] QuoteContext transcript lookup failed: %s",
-                        ctx.adapter.name, exc,
-                    )
-
         await next_fn()
 
 
@@ -2412,7 +2388,7 @@ class MediaResolveMiddleware(InboundMiddleware):
         for ref in media_refs:
             kind = str(ref.get("kind") or "").strip().lower()
             url = str(ref.get("url") or "").strip()
-            if kind not in {"image", "file"} or not url:
+            if kind not in _RESOLVABLE_MEDIA_KINDS or not url:
                 continue
 
             try:
@@ -2471,7 +2447,7 @@ class MediaResolveMiddleware(InboundMiddleware):
                 rid = m.group(2)
                 kind, _, filename = head.partition(":")
                 kind = kind.strip()
-                if kind not in {"image", "file"}:
+                if kind not in _RESOLVABLE_MEDIA_KINDS:
                     continue
                 if rid in seen:
                     continue
@@ -2542,9 +2518,34 @@ class DispatchMiddleware(InboundMiddleware):
             # quote_media_refs to avoid injecting unrelated history media.
             # Otherwise, backfill observed media from recent transcript history.
             if ctx.reply_to_message_id is not None:
+                # Fallback: if desc didn't contain ybres refs, look up transcript
+                if not ctx.quote_media_refs:
+                    try:
+                        store = getattr(adapter, "_session_store", None)
+                        if store:
+                            session_entry = store.get_or_create_session(ctx.source)
+                            history = store.load_transcript(session_entry.session_id)
+                            for msg in reversed(history or []):
+                                mid = msg.get("message_id", "")
+                                if mid and mid == ctx.reply_to_message_id:
+                                    _content = msg.get("content", "")
+                                    if isinstance(_content, str) and "|ybres:" in _content:
+                                        for m in _YB_RES_REF_RE.finditer(_content):
+                                            head = m.group(1)
+                                            rid = m.group(2)
+                                            kind, _, filename = head.partition(":")
+                                            kind = kind.strip()
+                                            if kind in _RESOLVABLE_MEDIA_KINDS:
+                                                ctx.quote_media_refs.append((rid, kind, filename.strip()))
+                                    break
+                    except Exception as exc:
+                        logger.warning(
+                            "[%s] quote transcript lookup failed: %s",
+                            adapter.name, exc,
+                        )
                 # User quoted a message — resolve only media from the quote
                 for rid, kind, filename in ctx.quote_media_refs:
-                    if kind not in ("image", "file"):
+                    if kind not in _RESOLVABLE_MEDIA_KINDS:
                         continue
                     try:
                         fresh_url = await MediaResolveMiddleware._resolve_by_resource_id(adapter, rid)
@@ -2619,7 +2620,7 @@ class DispatchMiddleware(InboundMiddleware):
                 text=_patched_event_text,
                 message_type=(
                     MessageType.DOCUMENT
-                    if any(not mt.startswith("image/") for mt in media_types)
+                    if any(mt.startswith(("application/", "text/")) for mt in media_types)
                     else ctx.msg_type
                 ),
                 source=ctx.source,

From e0e4856d466491ee8a31378c606e65ddfe061ab9 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 15 May 2026 01:20:24 -0700
Subject: [PATCH 010/218] feat(skills-hub): add huggingface/skills as trusted
 default tap (#2549)

Adds Hugging Face's official skill catalog to the default GitHub taps and
classifies it as a trusted source alongside openai/skills and anthropics/skills.

- tools/skills_guard.py: huggingface/skills -> TRUSTED_REPOS
- tools/skills_hub.py: GitHubSource.DEFAULT_TAPS += huggingface/skills (skills/)
- website/docs: list it under default taps + trusted-source examples

Closes #2549.

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
---
 tools/skills_guard.py                           | 2 +-
 tools/skills_hub.py                             | 1 +
 website/docs/developer-guide/creating-skills.md | 2 +-
 website/docs/user-guide/features/skills.md      | 3 ++-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/skills_guard.py b/tools/skills_guard.py
index 363e983da1a..1610c3225cb 100644
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@@ -36,7 +36,7 @@ from typing import List, Tuple
 # Hardcoded trust configuration
 # ---------------------------------------------------------------------------
 
-TRUSTED_REPOS = {"openai/skills", "anthropics/skills"}
+TRUSTED_REPOS = {"openai/skills", "anthropics/skills", "huggingface/skills"}
 
 INSTALL_POLICY = {
     #                  safe      caution    dangerous
diff --git a/tools/skills_hub.py b/tools/skills_hub.py
index 3e2c27c338a..35cec56e08e 100644
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -329,6 +329,7 @@ class GitHubSource(SkillSource):
     DEFAULT_TAPS = [
         {"repo": "openai/skills", "path": "skills/"},
         {"repo": "anthropics/skills", "path": "skills/"},
+        {"repo": "huggingface/skills", "path": "skills/"},
         {"repo": "VoltAgent/awesome-agent-skills", "path": "skills/"},
         {"repo": "garrytan/gstack", "path": ""},
         {"repo": "MiniMax-AI/cli", "path": "skill/"},
diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md
index 43f088a9a35..7496c661d48 100644
--- a/website/docs/developer-guide/creating-skills.md
+++ b/website/docs/developer-guide/creating-skills.md
@@ -360,7 +360,7 @@ All hub-installed skills go through a security scanner that checks for:
 Trust levels:
 - `builtin` — ships with Hermes (always trusted)
 - `official` — from `optional-skills/` in the repo (builtin trust, no third-party warning)
-- `trusted` — from openai/skills, anthropics/skills
+- `trusted` — from openai/skills, anthropics/skills, huggingface/skills
 - `community` — non-dangerous findings can be overridden with `--force`; `dangerous` verdicts remain blocked
 
 Hermes can now consume third-party skills from multiple external discovery models:
diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md
index 9499e15d806..9959bcce112 100644
--- a/website/docs/user-guide/features/skills.md
+++ b/website/docs/user-guide/features/skills.md
@@ -351,6 +351,7 @@ Hermes can install directly from GitHub repositories and GitHub-based taps. This
 Default taps (browsable without any setup):
 - [openai/skills](https://github.com/openai/skills)
 - [anthropics/skills](https://github.com/anthropics/skills)
+- [huggingface/skills](https://github.com/huggingface/skills)
 - [VoltAgent/awesome-agent-skills](https://github.com/VoltAgent/awesome-agent-skills)
 - [garrytan/gstack](https://github.com/garrytan/gstack)
 
@@ -445,7 +446,7 @@ Important behavior:
 |-------|--------|--------|
 | `builtin` | Ships with Hermes | Always trusted |
 | `official` | `optional-skills/` in the repo | Builtin trust, no third-party warning |
-| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills` | More permissive policy than community sources |
+| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills`, `huggingface/skills` | More permissive policy than community sources |
 | `community` | Everything else (`skills.sh`, well-known endpoints, custom GitHub repos, most marketplaces) | Non-dangerous findings can be overridden with `--force`; `dangerous` verdicts stay blocked |
 
 ### Update lifecycle

From e0e7397c32fa06e4c93ce07bc276ea5c1dca7a84 Mon Sep 17 00:00:00 2001
From: teyrebaz33 <hakanerten02@hotmail.com>
Date: Sun, 22 Mar 2026 23:54:02 +0300
Subject: [PATCH 011/218] fix(session): persist auto-reset state across gateway
 restarts

was_auto_reset, auto_reset_reason, and reset_had_activity were not
included in SessionEntry.to_dict() / from_dict(), so a gateway restart
between session expiry and the user's next message would silently drop
the auto-reset notification and context note.

Add the three fields to the serialization roundtrip with safe defaults
(False / None / False) so existing sessions.json files load cleanly.

Add three roundtrip tests to test_session_reset_notify.py.
---
 gateway/session.py                         |  6 ++
 tests/gateway/test_session_reset_notify.py | 75 ++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/gateway/session.py b/gateway/session.py
index ac6f95eec63..dfa2ca9651d 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -518,6 +518,9 @@ class SessionEntry:
                 else None
             ),
             "is_fresh_reset": self.is_fresh_reset,
+            "was_auto_reset": self.was_auto_reset,
+            "auto_reset_reason": self.auto_reset_reason,
+            "reset_had_activity": self.reset_had_activity,
         }
         if self.origin:
             result["origin"] = self.origin.to_dict()
@@ -567,6 +570,9 @@ class SessionEntry:
             resume_reason=data.get("resume_reason"),
             last_resume_marked_at=last_resume_marked_at,
             is_fresh_reset=data.get("is_fresh_reset", False),
+            was_auto_reset=data.get("was_auto_reset", False),
+            auto_reset_reason=data.get("auto_reset_reason"),
+            reset_had_activity=data.get("reset_had_activity", False),
         )
 
 
diff --git a/tests/gateway/test_session_reset_notify.py b/tests/gateway/test_session_reset_notify.py
index 87903921fbd..a4e9d71d0f8 100644
--- a/tests/gateway/test_session_reset_notify.py
+++ b/tests/gateway/test_session_reset_notify.py
@@ -205,3 +205,78 @@ class TestResetPolicyNotify:
         assert restored.notify == original.notify
         assert restored.notify_exclude_platforms == original.notify_exclude_platforms
         assert restored.mode == original.mode
+
+
+# ---------------------------------------------------------------------------
+# SessionEntry to_dict / from_dict roundtrip for auto-reset fields
+# ---------------------------------------------------------------------------
+
+class TestSessionEntryAutoResetRoundtrip:
+    def test_was_auto_reset_persists_across_roundtrip(self, tmp_path):
+        """was_auto_reset=True survives to_dict() → from_dict() (gateway restart)."""
+        store = _make_store(
+            SessionResetPolicy(mode="idle", idle_minutes=1),
+            tmp_path,
+        )
+        source = _make_source()
+
+        entry = store.get_or_create_session(source)
+        entry.updated_at = datetime.now() - timedelta(minutes=5)
+        store._save()
+
+        entry2 = store.get_or_create_session(source)
+        assert entry2.was_auto_reset is True
+        assert entry2.auto_reset_reason == "idle"
+        assert entry2.session_id != entry.session_id
+
+        # Simulate gateway restart: reload from disk
+        store._loaded = False
+        store._entries.clear()
+        store._ensure_loaded()
+
+        reloaded = store._entries.get(entry2.session_key)
+        assert reloaded is not None
+        assert reloaded.was_auto_reset is True
+        assert reloaded.auto_reset_reason == "idle"
+
+    def test_reset_had_activity_persists_across_roundtrip(self, tmp_path):
+        """reset_had_activity survives to_dict() → from_dict() (gateway restart)."""
+        store = _make_store(
+            SessionResetPolicy(mode="idle", idle_minutes=1),
+            tmp_path,
+        )
+        source = _make_source()
+
+        entry = store.get_or_create_session(source)
+        entry.total_tokens = 1000
+        entry.updated_at = datetime.now() - timedelta(minutes=5)
+        store._save()
+
+        entry2 = store.get_or_create_session(source)
+        assert entry2.reset_had_activity is True
+
+        store._loaded = False
+        store._entries.clear()
+        store._ensure_loaded()
+
+        reloaded = store._entries.get(entry2.session_key)
+        assert reloaded is not None
+        assert reloaded.reset_had_activity is True
+
+    def test_auto_reset_reason_none_roundtrip(self, tmp_path):
+        """auto_reset_reason=None (no reset) survives roundtrip cleanly."""
+        store = _make_store(tmp_path=tmp_path)
+        source = _make_source()
+
+        entry = store.get_or_create_session(source)
+        assert entry.was_auto_reset is False
+
+        store._loaded = False
+        store._entries.clear()
+        store._ensure_loaded()
+
+        reloaded = store._entries.get(entry.session_key)
+        assert reloaded is not None
+        assert reloaded.was_auto_reset is False
+        assert reloaded.auto_reset_reason is None
+        assert reloaded.reset_had_activity is False

From 23ac522d3711ea0735f11f4d8f6131ac24554dd3 Mon Sep 17 00:00:00 2001
From: KiraKatana <kira.ops@proton.me>
Date: Fri, 15 May 2026 01:24:44 -0700
Subject: [PATCH 012/218] fix(gateway): isinstance-guard string-form 429 error
 body

When a non-Anthropic provider (e.g. Morpheus proxy) returns a 429 with
`{"error": "Too Many Requests"}` instead of the expected
`{"error": {"type": ...}}` dict, _err_body.json().get("error", {})
returns the raw string and the next .get("type") line crashes with
AttributeError, taking down the message handler.

Guard with isinstance(_err_json, dict) so non-dict error bodies fall
through to the generic rate-limit hint.

Salvaged from PR #2587 by @KiraKatana. The PR's fallback-config
`base_url`/`api_key_env` fix was already implemented independently
on main (run_agent.py:8759-8780) with additional aliases and Ollama
Cloud host handling, so only the gateway guard is cherry-picked.

Co-authored-by: KiraKatana <kira.ops@proton.me>
---
 gateway/run.py     | 2 ++
 scripts/release.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index d986917ebab..5e8fce8e18d 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7991,6 +7991,8 @@ class GatewayRunner:
                 try:
                     if _err_body is not None:
                         _err_json = _err_body.json().get("error", {})
+                        if not isinstance(_err_json, dict):
+                            _err_json = {}
                 except Exception:
                     pass
                 if _err_json.get("type") == "usage_limit_reached":
diff --git a/scripts/release.py b/scripts/release.py
index 53db4bbec2c..47cb78edff8 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -637,6 +637,7 @@ AUTHOR_MAP = {
     "skmishra1991@gmail.com": "bugkill3r",
     "karamusti912@gmail.com": "MustafaKara7",
     "kira@ariaki.me": "kira-ariaki",
+    "kira.ops@proton.me": "KiraKatana",
     "knopki@duck.com": "knopki",
     "limars874@gmail.com": "limars874",
     "lisicheng168@gmail.com": "lesterli",

From 814c60092b08df3e4f7ccfcc0bab4e1fbaa39414 Mon Sep 17 00:00:00 2001
From: CoinTheHat <63822243+CoinTheHat@users.noreply.github.com>
Date: Mon, 23 Mar 2026 14:23:32 +0300
Subject: [PATCH 013/218] fix: clean stale conversation mappings on response
 eviction/deletion

ResponseStore.put() and .delete() now remove conversations rows that
reference evicted or deleted response IDs, preventing 404 errors when
a conversation name is reused after its backing response was purged.

Adds regression tests for delete, eviction, and handler-level reuse.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/api_server.py  | 29 ++++++++++++---
 tests/gateway/test_api_server.py | 62 ++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 8b53db3a99f..809d6cd8a03 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -356,15 +356,34 @@ class ResponseStore:
         # Evict oldest entries beyond max_size
         count = self._conn.execute("SELECT COUNT(*) FROM responses").fetchone()[0]
         if count > self._max_size:
-            self._conn.execute(
-                "DELETE FROM responses WHERE response_id IN "
-                "(SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?)",
-                (count - self._max_size,),
-            )
+            # Collect IDs that will be evicted
+            evict_ids = [
+                row[0]
+                for row in self._conn.execute(
+                    "SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?",
+                    (count - self._max_size,),
+                ).fetchall()
+            ]
+            if evict_ids:
+                placeholders = ",".join("?" for _ in evict_ids)
+                # Clear conversation mappings pointing to evicted responses
+                self._conn.execute(
+                    f"DELETE FROM conversations WHERE response_id IN ({placeholders})",
+                    evict_ids,
+                )
+                # Delete evicted responses
+                self._conn.execute(
+                    f"DELETE FROM responses WHERE response_id IN ({placeholders})",
+                    evict_ids,
+                )
         self._conn.commit()
 
     def delete(self, response_id: str) -> bool:
         """Remove a response from the store. Returns True if found and deleted."""
+        # Clear conversation mappings pointing to this response
+        self._conn.execute(
+            "DELETE FROM conversations WHERE response_id = ?", (response_id,)
+        )
         cursor = self._conn.execute(
             "DELETE FROM responses WHERE response_id = ?", (response_id,)
         )
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 66b304fff51..032af7109a5 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -105,6 +105,29 @@ class TestResponseStore:
         store = ResponseStore(max_size=10)
         assert store.delete("resp_missing") is False
 
+    def test_delete_clears_conversation_mapping(self):
+        """Deleting a response also removes conversation mappings that reference it."""
+        store = ResponseStore(max_size=10)
+        store.put("resp_1", {"output": "hello"})
+        store.set_conversation("chat-a", "resp_1")
+        assert store.get_conversation("chat-a") == "resp_1"
+        store.delete("resp_1")
+        assert store.get_conversation("chat-a") is None
+
+    def test_eviction_clears_conversation_mapping(self):
+        """LRU eviction also removes conversation mappings for evicted responses."""
+        store = ResponseStore(max_size=2)
+        store.put("resp_1", {"output": "one"})
+        store.set_conversation("chat-a", "resp_1")
+        store.put("resp_2", {"output": "two"})
+        store.set_conversation("chat-b", "resp_2")
+        # Adding a 3rd should evict resp_1 and its conversation mapping
+        store.put("resp_3", {"output": "three"})
+        assert store.get("resp_1") is None
+        assert store.get_conversation("chat-a") is None
+        # resp_2 mapping should still be intact
+        assert store.get_conversation("chat-b") == "resp_2"
+
 
 # ---------------------------------------------------------------------------
 # _IdempotencyCache
@@ -2870,6 +2893,45 @@ class TestConversationParameter:
                 # Conversation mapping should NOT be set since store=false
                 assert adapter._response_store.get_conversation("ephemeral-chat") is None
 
+    @pytest.mark.asyncio
+    async def test_conversation_reuse_after_eviction_no_404(self, adapter):
+        """After eviction clears a conversation mapping, reusing that name starts fresh (no 404)."""
+        adapter._response_store = ResponseStore(max_size=1)
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (
+                    {"final_response": "First", "messages": [], "api_calls": 1},
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+                # Create conversation -> resp stored
+                resp1 = await cli.post("/v1/responses", json={
+                    "input": "hello",
+                    "conversation": "my-chat",
+                })
+                assert resp1.status == 200
+
+                # Evict by adding another response
+                mock_run.return_value = (
+                    {"final_response": "Other", "messages": [], "api_calls": 1},
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+                await cli.post("/v1/responses", json={"input": "other"})
+
+                # Conversation mapping should have been cleaned by eviction
+                assert adapter._response_store.get_conversation("my-chat") is None
+
+                # Reuse conversation name — should start fresh, not 404
+                mock_run.return_value = (
+                    {"final_response": "Restarted", "messages": [], "api_calls": 1},
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+                resp3 = await cli.post("/v1/responses", json={
+                    "input": "hello again",
+                    "conversation": "my-chat",
+                })
+                assert resp3.status == 200
+
 
 # ---------------------------------------------------------------------------
 # X-Hermes-Session-Id header (session continuity)

From 0161d4bb6ce3154e2cdd8ce54d43273cf457840f Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:27:31 -0700
Subject: [PATCH 014/218] chore(release): add AUTHOR_MAP entry for CoinTheHat

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 47cb78edff8..10d67f3e708 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -74,6 +74,7 @@ AUTHOR_MAP = {
     "1095245867@qq.com": "littlewwwhite",
     "db@project-aeon.com": "db-aeon",
     "ahmed@abadr.net": "ahmedbadr3",
+    "63822243+CoinTheHat@users.noreply.github.com": "CoinTheHat",
     "cleo@edaphic.xyz": "curiouscleo",
     "hirokazu.ogawa@kwansei.ac.jp": "hrkzogw",
     "datapod.k@gmail.com": "dandacompany",

From 681778a0b753bac894bd30b1d257bcb3eface63d Mon Sep 17 00:00:00 2001
From: Wysie <wysie@users.noreply.github.com>
Date: Fri, 15 May 2026 01:29:43 -0700
Subject: [PATCH 015/218] fix(whatsapp): fail fast when Baileys sendMessage
 hangs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Baileys' sock.sendMessage() can hang indefinitely while uploading
media to WhatsApp servers (and, less often, on text sends), pinning
the bridge's Express handler until the gateway's aiohttp timeout
fires — surfacing to the user as a 120s wait followed by an empty
error from the TTS/voice path.

Wrap every sock.sendMessage() call inside the bridge in a
sendWithTimeout() helper that rejects after WHATSAPP_SEND_TIMEOUT_MS
(default 60s) via Promise.race. The four call sites are /send,
/edit, and /send-media's primary send. Express handlers catch the
rejection in their existing try/catch and return a real 500 to the
gateway, which can then surface a retryable error.

Salvaged from #2608 — wysie diagnosed the hang and the
Promise.race shape; the other two parts of that PR (gateway HTTP
session pooling, base.py metadata kwarg removal) already landed on
main via separate routes and are no longer needed.

Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
---
 scripts/whatsapp-bridge/bridge.js | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 9ff64471e56..5723d8b543b 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -57,11 +57,28 @@ const REPLY_PREFIX = process.env.WHATSAPP_REPLY_PREFIX === undefined
   : process.env.WHATSAPP_REPLY_PREFIX.replace(/\\n/g, '\n');
 const MAX_MESSAGE_LENGTH = parseInt(process.env.WHATSAPP_MAX_MESSAGE_LENGTH || '4096', 10);
 const CHUNK_DELAY_MS = parseInt(process.env.WHATSAPP_CHUNK_DELAY_MS || '300', 10);
+// Per-call timeout for sock.sendMessage(). Baileys occasionally hangs forever
+// when uploading media to WhatsApp servers (and, less often, on text sends),
+// which pins the bridge's HTTP handler until the upstream aiohttp timeout
+// fires. Fail fast instead so the gateway can surface a real error and retry.
+const SEND_TIMEOUT_MS = parseInt(process.env.WHATSAPP_SEND_TIMEOUT_MS || '60000', 10);
 
 function sleep(ms) {
   return new Promise(resolve => setTimeout(resolve, ms));
 }
 
+function sendWithTimeout(chatId, payload, timeoutMs = SEND_TIMEOUT_MS) {
+  let timer;
+  const timeoutPromise = new Promise((_, reject) => {
+    timer = setTimeout(
+      () => reject(new Error(`sendMessage timed out after ${timeoutMs / 1000}s`)),
+      timeoutMs,
+    );
+  });
+  return Promise.race([sock.sendMessage(chatId, payload), timeoutPromise])
+    .finally(() => clearTimeout(timer));
+}
+
 function formatOutgoingMessage(message) {
   // In bot mode, messages come from a different number so the prefix is
   // redundant — the sender identity is already clear.  Only prepend in
@@ -487,7 +504,7 @@ app.post('/send', async (req, res) => {
     const chunks = splitLongMessage(formatOutgoingMessage(message));
     const messageIds = [];
     for (let i = 0; i < chunks.length; i += 1) {
-      const sent = await sock.sendMessage(chatId, { text: chunks[i] });
+      const sent = await sendWithTimeout(chatId, { text: chunks[i] });
       trackSentMessageId(sent);
       if (sent?.key?.id) messageIds.push(sent.key.id);
       if (chunks.length > 1 && i < chunks.length - 1) {
@@ -521,10 +538,10 @@ app.post('/edit', async (req, res) => {
     const chunks = splitLongMessage(formatOutgoingMessage(message));
     const messageIds = [];
 
-    await sock.sendMessage(chatId, { text: chunks[0], edit: key });
+    await sendWithTimeout(chatId, { text: chunks[0], edit: key });
     if (chunks.length > 1) {
       for (let i = 1; i < chunks.length; i += 1) {
-        const sent = await sock.sendMessage(chatId, { text: chunks[i] });
+        const sent = await sendWithTimeout(chatId, { text: chunks[i] });
         trackSentMessageId(sent);
         if (sent?.key?.id) messageIds.push(sent.key.id);
         if (i < chunks.length - 1) {
@@ -625,7 +642,7 @@ app.post('/send-media', async (req, res) => {
         break;
     }
 
-    const sent = await sock.sendMessage(chatId, msgPayload);
+    const sent = await sendWithTimeout(chatId, msgPayload);
 
     trackSentMessageId(sent);
 

From 04b1fdaecfda15ff4c8f5c9f0041516efd01ba30 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Fri, 15 May 2026 14:03:08 +0530
Subject: [PATCH 016/218] security(deps): add upper bounds to 5 loose deps +
 document supply chain policy (#24226)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After the Mini Shai-Hulud supply chain campaign (May 2026) and the litellm
compromise (March 2026), codify the dependency pinning policy that was
established in PRs #2810 and #9801 but never written down for contributors.

Changes:
- pyproject.toml: Add tight upper bounds to the 5 deps that slipped
  through as review escapes from external contributor PRs:
  - hindsight-client>=0.4.22,<0.5 (was >=0.4.22)
  - aiosqlite>=0.20,<0.23 (was >=0.20)
  - asyncpg>=0.29,<0.32 (was >=0.29)
  - alibabacloud-dingtalk>=2.0.0,<3 (was >=2.0.0)
  - youtube-transcript-api>=1.2.0,<2 (was >=1.2.0)

  Pre-1.0 packages get <0.(current_minor+2) — tight enough to block
  hostile minor releases but loose enough to not require bumps every week.

- CONTRIBUTING.md: Add 'Dependency pinning policy' section under Security
  with the full rationale, table of source types + treatments, and examples.

- AGENTS.md: Add concise 'Dependency Pinning Policy' section for AI coding
  agents with the decision table and step-by-step checklist.

- supply-chain-audit.yml: Add dep-bounds job that fails PRs introducing
  PyPI deps without <ceiling upper bounds. Fires on pyproject.toml changes.
  Posts a PR comment with the specific unbounded specs found.

Refs: #2796 #2810 #9801 #24205
---
 .github/workflows/supply-chain-audit.yml | 66 ++++++++++++++++++++++++
 AGENTS.md                                | 23 +++++++++
 CONTRIBUTING.md                          | 41 +++++++++++++++
 3 files changed, 130 insertions(+)

diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml
index 417e7b21f84..69a9a115c87 100644
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -11,6 +11,7 @@ on:
       - '**/sitecustomize.py'
       - '**/usercustomize.py'
       - '**/__init__.pth'
+      - 'pyproject.toml'
 
 permissions:
   pull-requests: write
@@ -137,3 +138,68 @@ jobs:
         run: |
           echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
           exit 1
+
+  dep-bounds:
+    name: Check PyPI dependency upper bounds
+    runs-on: ubuntu-latest
+    if: contains(github.event.pull_request.changed_files_url, 'pyproject.toml') || true
+    steps:
+      - name: Checkout
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          fetch-depth: 0
+
+      - name: Check for unbounded PyPI deps
+        id: bounds
+        run: |
+          set -euo pipefail
+
+          BASE="${{ github.event.pull_request.base.sha }}"
+          HEAD="${{ github.event.pull_request.head.sha }}"
+
+          # Only check added lines in pyproject.toml
+          ADDED=$(git diff "$BASE".."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
+
+          if [ -z "$ADDED" ]; then
+            echo "found=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Match PyPI dep specs that have >= but no < ceiling.
+          # Pattern: "package>=version" without a following ",<" bound.
+          # Excludes git+ URLs (which use commit SHAs) and comments.
+          UNBOUNDED=$(echo "$ADDED" | grep -oE '"[a-zA-Z0-9_-]+(\[[^\]]*\])?>=[ 0-9.]+"' | grep -v ',<' || true)
+
+          if [ -n "$UNBOUNDED" ]; then
+            echo "found=true" >> "$GITHUB_OUTPUT"
+            echo "$UNBOUNDED" > /tmp/unbounded.txt
+          else
+            echo "found=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Post unbounded dep warning
+        if: steps.bounds.outputs.found == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          BODY="## ⚠️ Unbounded PyPI Dependency Detected
+
+          This PR adds PyPI dependencies without a \`<next_major\` upper bound. Per our [supply chain policy](../blob/main/CONTRIBUTING.md#dependency-pinning-policy-supply-chain-hardening), all PyPI deps must be pinned as \`>=floor,<next_major\`.
+
+          **Unbounded specs found:**
+          \`\`\`
+          $(cat /tmp/unbounded.txt)
+          \`\`\`
+
+          **Fix:** Add an upper bound, e.g. \`\"package>=1.2.0,<2\"\`
+
+          ---
+          *See PR #2810 and CONTRIBUTING.md for the full policy rationale.*"
+
+          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
+
+      - name: Fail on unbounded deps
+        if: steps.bounds.outputs.found == 'true'
+        run: |
+          echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
+          exit 1
diff --git a/AGENTS.md b/AGENTS.md
index d5d32f99c3d..7c324f50332 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -308,6 +308,29 @@ The registry handles schema collection, dispatch, availability checking, and err
 
 ---
 
+## Dependency Pinning Policy
+
+All dependencies must have upper bounds to limit supply-chain attack surface.
+This policy was established after the litellm compromise (PR #2796, #2810) and
+reinforced after the Mini Shai-Hulud worm campaign (May 2026).
+
+| Source type | Treatment | Example |
+|---|---|---|
+| PyPI package | `>=floor,<next_major` | `"httpx>=0.28.1,<1"` |
+| Git URL | Commit SHA | `git+https://...@<40-char-sha>` |
+| GitHub Actions | Commit SHA + comment | `uses: actions/checkout@<sha>  # v4` |
+| CI-only pip | `==exact` | `pyyaml==6.0.2` |
+
+**When adding a new dependency to `pyproject.toml`:**
+1. Pin to `>=current_version,<next_major` for post-1.0 (e.g. `>=1.5.0,<2`).
+2. For pre-1.0 packages, use `<0.(current_minor + 2)` (e.g. `>=0.29,<0.32`).
+3. Never commit a bare `>=X.Y.Z` without a ceiling — CI and reviewers will reject it.
+4. Run `uv lock` to regenerate `uv.lock` with hashes.
+
+Reference: #2810 (bounds pass), #9801 (SHA pinning + audit CI).
+
+---
+
 ## Adding Configuration
 
 ### config.yaml options:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9cbc26112f6..36b1e9df2d5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -800,6 +800,47 @@ Hermes has terminal access. Security matters.
 
 If your PR affects security, note it explicitly in the description.
 
+### Dependency pinning policy (supply chain hardening)
+
+After the [litellm supply chain compromise](https://github.com/BerriAI/litellm/issues/24512) in March 2026 and the [Mini Shai-Hulud worm campaign](https://socket.dev/blog/tanstack-npm-packages-compromised-mini-shai-hulud-supply-chain-attack) in May 2026, all dependencies must follow these rules:
+
+| Source type | Required treatment | Rationale |
+|---|---|---|
+| **PyPI package** | `>=floor,<next_major` | PyPI versions are immutable once published, but new versions can be pushed into your range. A `<next_major` ceiling stops a 1.x install from upgrading to a malicious 2.0.0. |
+| **Git URL** (atroposlib, tinker, yc-bench, Baileys) | Full commit SHA | Branches and tags are mutable refs; SHA is content-addressed. |
+| **GitHub Actions** | Full commit SHA + version comment | Action tags are mutable refs (e.g. tj-actions/changed-files March 2025). Pin as `uses: owner/action@<sha>  # vX.Y.Z` |
+| **CI-only pip installs** | `==exact` | Hermetic CI builds; churn is acceptable. |
+
+**Every new PyPI dependency in a PR must have a `<next_major` upper bound.** PRs adding unbounded `>=X.Y.Z` specs will be rejected by reviewers. The `supply-chain-audit.yml` CI workflow also flags dependency manifest changes for manual review.
+
+**How to determine the ceiling:**
+- If the package is at version `1.x.y`, use `<2`.
+- If the package is at version `0.x.y` (pre-1.0), use `<0.(current_minor + 2)` — e.g. if current is `0.29.x`, use `<0.32`. This gives ~2 minor versions of headroom while keeping the window small enough that a hostile takeover version is unlikely to land inside it.
+- Exception: packages with very stable APIs (e.g. `aiohttp-socks`) can use `<1` at reviewer discretion.
+
+**Examples:**
+```toml
+# ✅ Correct — post-1.0
+"openai>=2.21.0,<3"
+"pydantic>=2.12.5,<3"
+
+# ✅ Correct — pre-1.0 (tight minor window)
+"asyncpg>=0.29,<0.32"
+"aiosqlite>=0.20,<0.23"
+"hindsight-client>=0.4.22,<0.5"
+
+# ❌ Rejected — no upper bound
+"some-package>=1.2.3"
+
+# ❌ Rejected — too tight (blocks legitimate patches)
+"some-package==1.2.3"
+
+# ❌ Rejected — too loose for pre-1.0 (allows 80 minor versions)
+"some-package>=0.20,<1"
+```
+
+**Reference PRs:** #2796 (litellm removal), #2810 (upper bounds pass), #9801 (SHA pinning + supply-chain-audit CI).
+
 ---
 
 ## Pull Request Process

From 9329e06696c968b7a960541d0ee0167df6742f21 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:33:13 -0700
Subject: [PATCH 017/218] feat(image-gen): actionable setup message when no FAL
 backend is reachable (#26222)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the in-tree FAL path has no API key (and no managed gateway), the
handler used to return a bare 'FAL_KEY environment variable not set'
error. Users had no idea where to get a key, that a managed Nous
gateway exists, or that plugin-registered providers are an option.

Now `image_generate_tool` returns a structured multi-line message:
  - signup link (https://fal.ai)
  - managed-gateway status (if Nous tools are enabled)
  - pointer to `hermes tools` / `hermes plugins list` for alternate
    backends, so users on a stale `image_gen.provider` know where to look

The schema is untouched — `check_fn` still gates the tool out of the
schema when no backend is reachable at startup, consistent with every
other conditional tool. This patch fixes the call-time failure modes:
managed-gateway 5xx, plugin provider disappearing mid-session, etc.

Inspired by #2546 / @Mibayy. The PR was ~5700 commits stale against
the new plugin-aware image_gen architecture, so this is a forward port
of the actionable-error idea rather than a cherry-pick.


Closes #2543

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>
---
 tests/tools/test_image_generation_env.py | 59 ++++++++++++++++++++++++
 tools/image_generation_tool.py           | 41 ++++++++++++++--
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py
index fc4e6553346..56c9741617f 100644
--- a/tests/tools/test_image_generation_env.py
+++ b/tests/tools/test_image_generation_env.py
@@ -37,3 +37,62 @@ def test_fal_key_empty_is_unset(monkeypatch):
     )
 
     assert image_generation_tool.check_fal_api_key() is False
+
+
+# ---------------------------------------------------------------------------
+# Actionable setup message when no FAL backend is reachable.
+# Regression for the silent-drop UX gap described in issue #2543.
+# ---------------------------------------------------------------------------
+
+
+def test_no_backend_message_mentions_fal_signup_and_plugins(monkeypatch):
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "managed_nous_tools_enabled", lambda: False
+    )
+
+    msg = image_generation_tool._build_no_backend_setup_message()
+
+    assert "FAL_KEY" in msg
+    assert "https://fal.ai" in msg
+    # Plugin pointer so users on a stale image_gen.provider know where to look.
+    assert "hermes tools" in msg or "hermes plugins" in msg
+
+
+def test_no_backend_message_mentions_managed_gateway_when_enabled(monkeypatch):
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "managed_nous_tools_enabled", lambda: True
+    )
+
+    msg = image_generation_tool._build_no_backend_setup_message()
+
+    assert "managed FAL gateway" in msg
+    assert "Nous account" in msg or "hermes setup" in msg
+
+
+def test_image_generate_tool_returns_actionable_error_when_no_backend(monkeypatch):
+    """End-to-end: handler must surface the actionable message, not a bare string."""
+    import json
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "fal_key_is_configured", lambda: False
+    )
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+    monkeypatch.setattr(
+        image_generation_tool, "managed_nous_tools_enabled", lambda: False
+    )
+
+    result = json.loads(
+        image_generation_tool.image_generate_tool(prompt="a cat")
+    )
+
+    assert result["success"] is False
+    assert "https://fal.ai" in result["error"]
+    assert "FAL_KEY" in result["error"]
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index c496166ec98..3d171f093c9 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -698,10 +698,7 @@ def image_generate_tool(
             raise ValueError("Prompt is required and must be a non-empty string")
 
         if not (fal_key_is_configured() or _resolve_managed_fal_gateway()):
-            message = "FAL_KEY environment variable not set"
-            if managed_nous_tools_enabled():
-                message += " and managed FAL gateway is unavailable"
-            raise ValueError(message)
+            raise ValueError(_build_no_backend_setup_message())
 
         aspect_lc = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip()
         if aspect_lc not in VALID_ASPECT_RATIOS:
@@ -811,6 +808,42 @@ def check_fal_api_key() -> bool:
     return bool(fal_key_is_configured() or _resolve_managed_fal_gateway())
 
 
+def _build_no_backend_setup_message() -> str:
+    """Build an actionable error string when no FAL backend is reachable.
+
+    Used by the in-tree FAL path. Mentions:
+      - FAL_KEY signup link
+      - managed-gateway status (if Nous tools are enabled)
+      - plugin alternative pointer (so users on a stale ``image_gen.provider``
+        know the registry exists and how to inspect it)
+    """
+    lines = ["Image generation is unavailable in this environment.", ""]
+    lines.append("Missing requirements:")
+    if managed_nous_tools_enabled():
+        lines.append(
+            "  - FAL_KEY is not set and the managed FAL gateway is unreachable"
+        )
+    else:
+        lines.append("  - FAL_KEY environment variable is not set")
+    lines.append("")
+    lines.append("To enable image generation, do one of:")
+    lines.append(
+        "  1. Get a free API key at https://fal.ai and set "
+        "FAL_KEY=<your-key> (then restart the session)"
+    )
+    if managed_nous_tools_enabled():
+        lines.append(
+            "  2. Sign in to a Nous account that has the managed FAL "
+            "gateway enabled (`hermes setup`)"
+        )
+    lines.append(
+        "  3. Configure a different image_gen provider via `hermes tools` "
+        "→ Image Generation (run `hermes plugins list` to see installed "
+        "backends)"
+    )
+    return "\n".join(lines)
+
+
 def check_image_generation_requirements() -> bool:
     """True if any image gen backend is available.
 

From 05d9f641c06043a538ba03e3ed008a97403fcc3b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:34:15 -0700
Subject: [PATCH 018/218] docs(cron): worked recipes for the wakeAgent pre-run
 gate (#26229)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds three pre-run gate recipes to the cron docs:
- file-change gate (stat + mtime + state file)
- external-flag gate (file presence)
- SQL-count gate (user's own database, not state.db)

These are the use cases @iankar8 proposed adding as a parallel
'trigger' subsystem in #2654. The existing `script` + `wakeAgent`
gate already covers all three at $0 — this lands the patterns as
documentation so users can find them, instead of adding a second
gating mechanism to the cron subsystem.
---
 website/docs/user-guide/features/cron.md | 80 ++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index c2c67df8a2a..9a14e6dcd1e 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -522,6 +522,86 @@ print(json.dumps({"wakeAgent": True, "context": {"new_issues": latest - prev}}))
 
 When `wakeAgent` is omitted, the default is `true` (wake the agent as usual).
 
+#### Recipes: cheap pre-run gates
+
+The `wakeAgent` gate gives you a $0 way to decide whether a scheduled job should spend any LLM tokens at all. Three patterns cover most use cases.
+
+**File-change gate** — only run when a watched file has new content since the last successful tick. The scheduler records each job's `last_run_at`; compare it against the file's mtime.
+
+```bash
+#!/bin/bash
+# ~/.hermes/scripts/feed-changed.sh
+FEED="$HOME/data/feed.json"
+STATE="$HOME/.hermes/scripts/.feed-changed.last"
+test -f "$FEED" || { echo '{"wakeAgent": false}'; exit 0; }
+mtime=$(stat -c %Y "$FEED")
+last=$(cat "$STATE" 2>/dev/null || echo 0)
+if [ "$mtime" -le "$last" ]; then
+  echo '{"wakeAgent": false}'
+else
+  echo "$mtime" > "$STATE"
+  echo '{"wakeAgent": true}'
+fi
+```
+
+```text
+cronjob(action="create", name="process-feed",
+        schedule="every 30m",
+        script="feed-changed.sh",
+        prompt="A new ~/data/feed.json has landed. Summarize what changed.")
+```
+
+**External-flag gate** — only run when some other process has signalled readiness (e.g. a deploy hook drops a file, a CI job sets a value in your state store).
+
+```bash
+#!/bin/bash
+# ~/.hermes/scripts/flag-ready.sh
+if test -f /tmp/new-data-ready; then
+  rm -f /tmp/new-data-ready
+  echo '{"wakeAgent": true}'
+else
+  echo '{"wakeAgent": false}'
+fi
+```
+
+```text
+cronjob(action="create", name="nightly-analysis",
+        schedule="0 9 * * *",
+        script="flag-ready.sh",
+        prompt="Run the nightly analysis over today's batch.")
+```
+
+**SQL-count gate** — only run when there are new rows to process in your own database. The script can also pass the count through to the agent via `context`, so the agent knows how much it's looking at without re-querying.
+
+```python
+#!/usr/bin/env python
+# ~/.hermes/scripts/new-rows.py
+import json, sqlite3
+conn = sqlite3.connect("/home/me/data/app.db")
+n = conn.execute(
+    "SELECT COUNT(*) FROM messages WHERE ts > strftime('%s','now','-2 hours')"
+).fetchone()[0]
+if n < 1:
+    print(json.dumps({"wakeAgent": False}))
+else:
+    print(json.dumps({"wakeAgent": True, "context": {"new_rows": n}}))
+```
+
+```text
+cronjob(action="create", name="summarize-new-msgs",
+        schedule="every 2h",
+        script="new-rows.py",
+        prompt="Summarize the new messages from the last 2 hours.")
+```
+
+The same pattern works for any data source you can query from a script — Postgres, an HTTP API, your own state store — without baking a SQL evaluator into the cron subsystem.
+
+:::tip
+Hermes's own `~/.hermes/state.db` is an internal schema that changes between releases. Don't query it from a pre-run gate — point at your own database or feed instead.
+:::
+
+Credit: this recipe set was prompted by @iankar8's exploration in [#2654](https://github.com/NousResearch/hermes-agent/pull/2654), which proposed adding sql/file/command triggers as a parallel mechanism. The `script` + `wakeAgent` gate already covers all three cases at $0, so the work landed as documentation instead.
+
 ### Chaining jobs: `context_from`
 
 A cron job can consume the most recent successful output of one or more other jobs by listing their names (or IDs) in `context_from`:

From 6682f91b80bab57c65435ae6b5cdc791334ed620 Mon Sep 17 00:00:00 2001
From: buntingszn <108427749+buntingszn@users.noreply.github.com>
Date: Fri, 15 May 2026 01:33:12 -0700
Subject: [PATCH 019/218] feat(cron): support name-based lookup for job
 operations

Cron mutation operations (run/pause/resume/remove) and 'hermes cron edit'
now accept a job name in addition to the hex ID, with case-insensitive
matching. Before this, 'hermes cron run my_job_name' died with
'Job with ID my_job_name not found' and forced the user to look up the
hex ID first.

The original PR matched by name but silently picked the first match when
two jobs shared a name. This version refuses to act on an ambiguous name
and surfaces every matching job (id, name, schedule, next_run_at) so the
caller can pick a specific ID.

- cron/jobs.py:
  - get_job() stays ID-only (preserves existing call-site semantics for
    web_server/api_server/curator/scheduler/test code that always passes
    real IDs).
  - resolve_job_ref() is the new name-or-ID resolver, used by pause/
    resume/trigger/remove_job. Exact ID match wins over a name match
    even if a different job's name happens to equal that ID. Ambiguous
    name match raises AmbiguousJobReference with all candidate IDs.
- tools/cronjob_tools.py: dispatch site uses resolve_job_ref, surfaces
  ambiguous matches as a structured error with the matching IDs.
- hermes_cli/cron.py: 'cron edit' uses resolve_job_ref so editing by
  name works and ambiguous names are reported with IDs.
- tests/cron/test_jobs.py: new TestResolveJobRef covering ID match,
  case-insensitive name match, ID-wins-over-name, ambiguous refusal,
  and that pause/resume/trigger/remove all refuse on ambiguity.

Closes #2627
---
 cron/jobs.py            | 67 +++++++++++++++++++++++++------
 hermes_cli/cron.py      | 10 ++++-
 tests/cron/test_jobs.py | 87 +++++++++++++++++++++++++++++++++++++++++
 tools/cronjob_tools.py  | 28 +++++++++++--
 4 files changed, 176 insertions(+), 16 deletions(-)

diff --git a/cron/jobs.py b/cron/jobs.py
index 6b3bc0e66f9..c5da32d44d5 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -645,6 +645,44 @@ def get_job(job_id: str) -> Optional[Dict[str, Any]]:
     return None
 
 
+class AmbiguousJobReference(LookupError):
+    """Raised when a job name matches more than one job."""
+
+    def __init__(self, ref: str, matches: List[Dict[str, Any]]):
+        self.ref = ref
+        self.matches = matches
+        ids = ", ".join(m["id"] for m in matches)
+        super().__init__(
+            f"Job name '{ref}' is ambiguous — matches {len(matches)} jobs: {ids}. "
+            f"Use the job ID instead."
+        )
+
+
+def resolve_job_ref(ref: str) -> Optional[Dict[str, Any]]:
+    """Resolve a job reference (ID or name) to a job record.
+
+    - Exact ID match wins (works even if a different job's name equals this ID).
+    - Otherwise, case-insensitive name match.
+    - If a name matches more than one job, raises AmbiguousJobReference so the
+      caller can surface the matching IDs rather than silently picking one.
+    """
+    if not ref:
+        return None
+    jobs = load_jobs()
+    for job in jobs:
+        if job["id"] == ref:
+            return _normalize_job_record(job)
+    ref_lower = ref.lower()
+    name_matches = [j for j in jobs if (j.get("name") or "").lower() == ref_lower]
+    if not name_matches:
+        return None
+    if len(name_matches) > 1:
+        raise AmbiguousJobReference(
+            ref, [_normalize_job_record(j) for j in name_matches]
+        )
+    return _normalize_job_record(name_matches[0])
+
+
 def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]:
     """List all jobs, optionally including disabled ones."""
     jobs = [_normalize_job_record(j) for j in load_jobs()]
@@ -702,9 +740,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
 
 
 def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]:
-    """Pause a job without deleting it."""
+    """Pause a job without deleting it. Accepts a job ID or name."""
+    job = resolve_job_ref(job_id)
+    if not job:
+        return None
     return update_job(
-        job_id,
+        job["id"],
         {
             "enabled": False,
             "state": "paused",
@@ -715,14 +756,14 @@ def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, A
 
 
 def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
-    """Resume a paused job and compute the next future run from now."""
-    job = get_job(job_id)
+    """Resume a paused job and compute the next future run from now. Accepts a job ID or name."""
+    job = resolve_job_ref(job_id)
     if not job:
         return None
 
     next_run_at = compute_next_run(job["schedule"])
     return update_job(
-        job_id,
+        job["id"],
         {
             "enabled": True,
             "state": "scheduled",
@@ -734,12 +775,12 @@ def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
 
 
 def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
-    """Schedule a job to run on the next scheduler tick."""
-    job = get_job(job_id)
+    """Schedule a job to run on the next scheduler tick. Accepts a job ID or name."""
+    job = resolve_job_ref(job_id)
     if not job:
         return None
     return update_job(
-        job_id,
+        job["id"],
         {
             "enabled": True,
             "state": "scheduled",
@@ -751,14 +792,18 @@ def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
 
 
 def remove_job(job_id: str) -> bool:
-    """Remove a job by ID."""
+    """Remove a job by ID or name."""
+    job = resolve_job_ref(job_id)
+    if not job:
+        return False
+    canonical_id = job["id"]
     jobs = load_jobs()
     original_len = len(jobs)
-    jobs = [j for j in jobs if j["id"] != job_id]
+    jobs = [j for j in jobs if j["id"] != canonical_id]
     if len(jobs) < original_len:
         save_jobs(jobs)
         # Clean up output directory to prevent orphaned dirs accumulating
-        job_output_dir = OUTPUT_DIR / job_id
+        job_output_dir = OUTPUT_DIR / canonical_id
         if job_output_dir.exists():
             shutil.rmtree(job_output_dir)
         return True
diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py
index adf4f0c0927..7bff9c6b87b 100644
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@@ -196,9 +196,15 @@ def cron_create(args):
 
 
 def cron_edit(args):
-    from cron.jobs import get_job
+    from cron.jobs import AmbiguousJobReference, resolve_job_ref
 
-    job = get_job(args.job_id)
+    try:
+        job = resolve_job_ref(args.job_id)
+    except AmbiguousJobReference as exc:
+        print(color(str(exc), Colors.RED))
+        for m in exc.matches:
+            print(f"  {m['id']}  (name: {m.get('name')!r})")
+        return 1
     if not job:
         print(color(f"Job not found: {args.job_id}", Colors.RED))
         return 1
diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py
index af42ca444b2..16c56cd6220 100644
--- a/tests/cron/test_jobs.py
+++ b/tests/cron/test_jobs.py
@@ -321,6 +321,93 @@ class TestPauseResumeJob:
         assert resumed["paused_reason"] is None
 
 
+class TestResolveJobRef:
+    """Name-based job lookup for CLI/tool callers (PR #2627, @buntingszn)."""
+
+    def test_resolve_by_exact_id(self, tmp_cron_dir):
+        from cron.jobs import resolve_job_ref
+
+        job = create_job(prompt="A", schedule="1h", name="alpha")
+        assert resolve_job_ref(job["id"])["id"] == job["id"]
+
+    def test_resolve_by_name(self, tmp_cron_dir):
+        from cron.jobs import resolve_job_ref
+
+        job = create_job(prompt="A", schedule="1h", name="alpha")
+        assert resolve_job_ref("alpha")["id"] == job["id"]
+
+    def test_resolve_by_name_case_insensitive(self, tmp_cron_dir):
+        from cron.jobs import resolve_job_ref
+
+        job = create_job(prompt="A", schedule="1h", name="MyJob")
+        assert resolve_job_ref("myjob")["id"] == job["id"]
+        assert resolve_job_ref("MYJOB")["id"] == job["id"]
+
+    def test_resolve_returns_none_when_not_found(self, tmp_cron_dir):
+        from cron.jobs import resolve_job_ref
+
+        create_job(prompt="A", schedule="1h", name="alpha")
+        assert resolve_job_ref("does-not-exist") is None
+        assert resolve_job_ref("") is None
+
+    def test_resolve_id_wins_over_name(self, tmp_cron_dir):
+        """If a job's name happens to equal another job's ID, ID match wins."""
+        from cron.jobs import resolve_job_ref
+
+        j1 = create_job(prompt="A", schedule="1h")
+        # Create a second job whose name is j1's ID
+        j2 = create_job(prompt="B", schedule="1h", name=j1["id"])
+        # Looking up j1["id"] must return j1, not the colliding-name job j2
+        assert resolve_job_ref(j1["id"])["id"] == j1["id"]
+        assert resolve_job_ref(j1["id"])["id"] != j2["id"]
+
+    def test_resolve_ambiguous_name_raises(self, tmp_cron_dir):
+        """Two jobs sharing a name → refuse to pick, surface both IDs."""
+        from cron.jobs import AmbiguousJobReference, resolve_job_ref
+
+        j1 = create_job(prompt="A", schedule="1h", name="dup")
+        j2 = create_job(prompt="B", schedule="1h", name="dup")
+        with pytest.raises(AmbiguousJobReference) as exc_info:
+            resolve_job_ref("dup")
+        ids = {m["id"] for m in exc_info.value.matches}
+        assert ids == {j1["id"], j2["id"]}
+        # Error message mentions both IDs so the user can pick one
+        assert j1["id"] in str(exc_info.value)
+        assert j2["id"] in str(exc_info.value)
+
+    def test_trigger_by_name(self, tmp_cron_dir):
+        from cron.jobs import trigger_job
+
+        job = create_job(prompt="A", schedule="1h", name="alpha")
+        result = trigger_job("alpha")
+        assert result is not None
+        assert result["id"] == job["id"]
+
+    def test_pause_by_name(self, tmp_cron_dir):
+        job = create_job(prompt="A", schedule="1h", name="alpha")
+        result = pause_job("alpha", reason="manual")
+        assert result is not None
+        assert result["id"] == job["id"]
+        assert result["state"] == "paused"
+
+    def test_remove_by_name(self, tmp_cron_dir):
+        job = create_job(prompt="A", schedule="1h", name="alpha")
+        assert remove_job("alpha") is True
+        assert get_job(job["id"]) is None
+
+    def test_mutations_refuse_ambiguous_name(self, tmp_cron_dir):
+        """pause/resume/trigger/remove must refuse to act on an ambiguous name."""
+        from cron.jobs import AmbiguousJobReference, trigger_job
+
+        create_job(prompt="A", schedule="1h", name="dup")
+        create_job(prompt="B", schedule="1h", name="dup")
+        for fn in (pause_job, resume_job, trigger_job):
+            with pytest.raises(AmbiguousJobReference):
+                fn("dup")
+        with pytest.raises(AmbiguousJobReference):
+            remove_job("dup")
+
+
 class TestMarkJobRun:
     def test_increments_completed(self, tmp_cron_dir):
         job = create_job(prompt="Test", schedule="every 1h")
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index e63b60047ac..3c29431484d 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -21,12 +21,14 @@ logger = logging.getLogger(__name__)
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
 from cron.jobs import (
+    AmbiguousJobReference,
     create_job,
     get_job,
     list_jobs,
     parse_schedule,
     pause_job,
     remove_job,
+    resolve_job_ref,
     resume_job,
     trigger_job,
     update_job,
@@ -393,12 +395,32 @@ def cronjob(
         if not job_id:
             return tool_error(f"job_id is required for action '{normalized}'", success=False)
 
-        job = get_job(job_id)
-        if not job:
+        try:
+            job = resolve_job_ref(job_id)
+        except AmbiguousJobReference as exc:
             return json.dumps(
-                {"success": False, "error": f"Job with ID '{job_id}' not found. Use cronjob(action='list') to inspect jobs."},
+                {
+                    "success": False,
+                    "error": str(exc),
+                    "matches": [
+                        {
+                            "id": m["id"],
+                            "name": m.get("name"),
+                            "schedule": m.get("schedule_display"),
+                            "next_run_at": m.get("next_run_at"),
+                        }
+                        for m in exc.matches
+                    ],
+                },
                 indent=2,
             )
+        if not job:
+            return json.dumps(
+                {"success": False, "error": f"Job with ID or name '{job_id}' not found. Use cronjob(action='list') to inspect jobs."},
+                indent=2,
+            )
+        # Resolve to canonical ID (supports name-based lookup)
+        job_id = job["id"]
 
         if normalized == "remove":
             removed = remove_job(job_id)

From 9f57f2286d9fb52419c69ea64c3119f734b35ef1 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:33:16 -0700
Subject: [PATCH 020/218] chore(release): add AUTHOR_MAP entry for buntingszn

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 10d67f3e708..b0e1fda9686 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -64,6 +64,7 @@ AUTHOR_MAP = {
     "mr@shu.io": "mrshu",
     "adam.manning@gmail.com": "am423",
     "buraysandro9@gmail.com": "ygd58",
+    "108427749+buntingszn@users.noreply.github.com": "buntingszn",
     "yanglongwei06@gmail.com": "Alex-yang00",
     "teknium@nousresearch.com": "teknium1",
     "piyushvp1@gmail.com": "thelumiereguy",

From 85782a4ed7f2329957c4af9a4243acb51c3cf921 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:36:54 -0700
Subject: [PATCH 021/218] feat(acp): hermes acp --setup-browser bootstraps
 browser tools for registry installs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Zed ACP Registry path (uvx --from 'hermes-agent[acp]==X' hermes-acp)
gets a Python-only install. Browser tools depend on the agent-browser npm
package + Chromium, neither of which are in the wheel. Without an
explicit bootstrap, registry users have no path to working browser tools.

Ship a bundled, idempotent bootstrap script (Linux/macOS bash + Windows
PowerShell) inside acp_adapter/bootstrap/ as wheel package-data. New
entry points:

  hermes acp --setup-browser        # interactive; prompts before Chromium download
  hermes acp --setup-browser --yes  # non-interactive
  hermes-acp --setup-browser

The terminal-auth flow (hermes acp --setup) also offers the browser
bootstrap as a follow-up after model selection, so first-run registry
users get the option without knowing the flag exists.

Key design choices:
- npm install -g --prefix $NODE_PREFIX so we never need sudo. System Node
  on PATH is respected; only the install target is redirected to the
  user-writable Hermes-managed Node prefix.
- tools/browser_tool.py::_browser_candidate_path_dirs() already walks
  $HERMES_HOME/node/bin, so installed binaries are discovered with no
  agent-side code change.
- System Chrome/Chromium detection short-circuits the ~400 MB Playwright
  download when a suitable browser already exists.
- Bash + PowerShell live as ONE copy each under acp_adapter/bootstrap/.
  Not duplicated under scripts/. install.sh and install.ps1 keep their
  inline browser blocks for the source-checkout path.

E2E validated end-to-end:
  bash bootstrap_browser_tools.sh --skip-chromium
    → installs agent-browser into ~/.hermes/node/bin/
  tools.browser_tool._find_agent_browser()
    → returns the installed path
  check_browser_requirements()
    → returns True (browser tools register)

Tests:
- tests/acp/test_entry.py: 11 tests covering --setup-browser dispatch
  (linux + windows + --yes forwarding + failure propagation), the
  terminal-auth follow-up prompt path, and a package-data wheel-shipping
  assertion that catches any future pyproject.toml regression.

Docs: website/docs/user-guide/features/acp.md gains a 'Browser tools
(optional)' subsection with the two-line install + what-it-does.
---
 acp_adapter/bootstrap/__init__.py             |   0
 .../bootstrap/bootstrap_browser_tools.ps1     | 288 +++++++++++++
 .../bootstrap/bootstrap_browser_tools.sh      | 399 ++++++++++++++++++
 acp_adapter/entry.py                          |  88 ++++
 hermes_cli/main.py                            |  18 +
 pyproject.toml                                |   3 +-
 tests/acp/test_entry.py                       | 147 ++++++-
 website/docs/user-guide/features/acp.md       |  21 +
 8 files changed, 961 insertions(+), 3 deletions(-)
 create mode 100644 acp_adapter/bootstrap/__init__.py
 create mode 100644 acp_adapter/bootstrap/bootstrap_browser_tools.ps1
 create mode 100755 acp_adapter/bootstrap/bootstrap_browser_tools.sh

diff --git a/acp_adapter/bootstrap/__init__.py b/acp_adapter/bootstrap/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 b/acp_adapter/bootstrap/bootstrap_browser_tools.ps1
new file mode 100644
index 00000000000..f840fd2d559
--- /dev/null
+++ b/acp_adapter/bootstrap/bootstrap_browser_tools.ps1
@@ -0,0 +1,288 @@
+# bootstrap_browser_tools.ps1 — install agent-browser + Playwright Chromium
+# into ~/.hermes/node/ for use by Hermes Agent's browser tools on Windows.
+#
+# Targets the registry-install path: users who got Hermes via
+# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone,
+# so the install.ps1 `npm install`-in-repo flow doesn't apply. This script
+# is a self-contained, idempotent slice of install.ps1's browser block.
+#
+# Usage:
+#   .\bootstrap_browser_tools.ps1                # use defaults
+#   .\bootstrap_browser_tools.ps1 -Yes           # accept Chromium download
+#   .\bootstrap_browser_tools.ps1 -SkipChromium  # Node + agent-browser only
+#
+# Idempotent: re-running this is safe and fast.
+
+[CmdletBinding()]
+param(
+    [switch]$Yes,
+    [switch]$SkipChromium
+)
+
+$ErrorActionPreference = "Stop"
+$NodeVersion = "22"
+
+# ─────────────────────────────────────────────────────────────────────────
+# Logging
+# ─────────────────────────────────────────────────────────────────────────
+
+function Write-Info    { param([string]$msg) Write-Host "[*] $msg" -ForegroundColor Cyan    }
+function Write-Success { param([string]$msg) Write-Host "[+] $msg" -ForegroundColor Green   }
+function Write-Warn    { param([string]$msg) Write-Host "[!] $msg" -ForegroundColor Yellow  }
+function Write-Err     { param([string]$msg) Write-Host "[x] $msg" -ForegroundColor Red     }
+
+# ─────────────────────────────────────────────────────────────────────────
+# Paths
+# ─────────────────────────────────────────────────────────────────────────
+
+$HermesHome = $env:HERMES_HOME
+if (-not $HermesHome) {
+    $HermesHome = Join-Path $env:USERPROFILE ".hermes"
+}
+$NodePrefix = Join-Path $HermesHome "node"
+
+# ─────────────────────────────────────────────────────────────────────────
+# Step 1: Node.js
+# ─────────────────────────────────────────────────────────────────────────
+
+function Resolve-NpmExe {
+    # Same gotcha as install.ps1: prefer npm.cmd over npm.ps1 so the
+    # PowerShell execution policy doesn't block us.
+    $cmd = Get-Command npm -ErrorAction SilentlyContinue
+    if (-not $cmd) { return $null }
+    $npmExe = $cmd.Source
+    if ($npmExe -like "*.ps1") {
+        $sibling = Join-Path (Split-Path $npmExe -Parent) "npm.cmd"
+        if (Test-Path $sibling) { return $sibling }
+    }
+    return $npmExe
+}
+
+function Resolve-NpxExe {
+    $cmd = Get-Command npx -ErrorAction SilentlyContinue
+    if (-not $cmd) { return $null }
+    $npxExe = $cmd.Source
+    if ($npxExe -like "*.ps1") {
+        $sibling = Join-Path (Split-Path $npxExe -Parent) "npx.cmd"
+        if (Test-Path $sibling) { return $sibling }
+    }
+    return $npxExe
+}
+
+function Ensure-Node {
+    # System Node on PATH?
+    $sysNode = Get-Command node -ErrorAction SilentlyContinue
+    if ($sysNode) {
+        try {
+            $v = & $sysNode.Source --version
+            $major = [int]($v -replace '^v(\d+).*', '$1')
+            if ($major -ge 20) {
+                Write-Success "Node.js $v found on PATH"
+                return
+            }
+            Write-Warn "Node.js $v is older than v20 — installing managed Node."
+        } catch {
+            Write-Warn "Failed to query Node version: $_"
+        }
+    }
+
+    # Hermes-managed Node?
+    $managedNode = Join-Path $NodePrefix "node.exe"
+    if (Test-Path $managedNode) {
+        $v = & $managedNode --version
+        Write-Success "Node.js $v found (Hermes-managed at $NodePrefix)"
+        # Prepend to current-process PATH so subsequent npm/npx calls find it.
+        $env:PATH = "$NodePrefix;$env:PATH"
+        return
+    }
+
+    Write-Info "Installing Node.js $NodeVersion LTS into $NodePrefix ..."
+
+    $arch = if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" }
+    $indexUrl = "https://nodejs.org/dist/latest-v${NodeVersion}.x/"
+
+    try {
+        $indexPage = Invoke-WebRequest -Uri $indexUrl -UseBasicParsing
+        $matches = [regex]::Matches($indexPage.Content, "node-v${NodeVersion}\.\d+\.\d+-win-${arch}\.zip")
+        if ($matches.Count -eq 0) {
+            Write-Err "Could not locate Node.js $NodeVersion zip for win-$arch"
+            throw "no tarball"
+        }
+        $zipName = $matches[0].Value
+        $zipUrl = "$indexUrl$zipName"
+
+        $tmpDir = Join-Path $env:TEMP "hermes-node-$([guid]::NewGuid().ToString('N'))"
+        New-Item -ItemType Directory -Force -Path $tmpDir | Out-Null
+        $zipPath = Join-Path $tmpDir $zipName
+
+        Write-Info "Downloading $zipName ..."
+        Invoke-WebRequest -Uri $zipUrl -OutFile $zipPath -UseBasicParsing
+
+        Expand-Archive -Path $zipPath -DestinationPath $tmpDir -Force
+        $extracted = Get-ChildItem -Path $tmpDir -Directory | Where-Object { $_.Name -like "node-v*" } | Select-Object -First 1
+
+        if (-not $extracted) { Write-Err "Node.js extraction failed"; throw "extract" }
+
+        if (Test-Path $NodePrefix) { Remove-Item -Recurse -Force $NodePrefix }
+        New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null
+        Move-Item -Path $extracted.FullName -Destination $NodePrefix
+
+        Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue
+
+        $env:PATH = "$NodePrefix;$env:PATH"
+        $v = & "$NodePrefix\node.exe" --version
+        Write-Success "Node.js $v installed to $NodePrefix"
+    } catch {
+        Write-Err "Node.js install failed: $_"
+        Write-Info "Install Node 20+ manually from https://nodejs.org/en/download/ and re-run."
+        throw
+    }
+}
+
+# ─────────────────────────────────────────────────────────────────────────
+# Step 2: agent-browser
+# ─────────────────────────────────────────────────────────────────────────
+
+function Ensure-AgentBrowser {
+    $npmExe = Resolve-NpmExe
+    if (-not $npmExe) {
+        Write-Err "npm not on PATH after Node install — aborting"
+        throw "npm missing"
+    }
+
+    # Already installed?
+    $existing = Get-Command agent-browser -ErrorAction SilentlyContinue
+    if ($existing) {
+        Write-Success "agent-browser already installed at $($existing.Source)"
+        return
+    }
+
+    # When the user has system Node (winget / installer-based), `npm install
+    # -g` writes to a directory that may require admin rights. Force the
+    # prefix to the user-writable Hermes-managed Node directory so we never
+    # need elevation and the agent can always find the result. Mirrors the
+    # bash bootstrap's `--prefix $NODE_PREFIX` strategy.
+    New-Item -ItemType Directory -Force -Path $NodePrefix | Out-Null
+
+    Write-Info "Installing agent-browser (npm, prefix=$NodePrefix)..."
+    & $npmExe install -g --prefix $NodePrefix --silent `
+        "agent-browser@^0.26.0" "@askjo/camofox-browser@^1.5.2"
+    if ($LASTEXITCODE -ne 0) {
+        Write-Err "npm install -g agent-browser failed (exit $LASTEXITCODE)"
+        throw "npm install"
+    }
+
+    # Windows npm global installs drop shims at $NodePrefix\ root (not bin/).
+    # Prepend to PATH so any subsequent npx call resolves them.
+    $env:PATH = "$NodePrefix;$env:PATH"
+
+    Write-Success "agent-browser installed to $NodePrefix"
+}
+
+# ─────────────────────────────────────────────────────────────────────────
+# Step 3: Playwright Chromium
+# ─────────────────────────────────────────────────────────────────────────
+
+function Find-SystemBrowser {
+    $candidates = @(
+        "C:\Program Files\Google\Chrome\Application\chrome.exe",
+        "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
+        "C:\Program Files\Chromium\Application\chromium.exe",
+        "${env:LOCALAPPDATA}\Google\Chrome\Application\chrome.exe",
+        "${env:LOCALAPPDATA}\Chromium\Application\chromium.exe"
+    )
+    foreach ($p in $candidates) {
+        if (Test-Path $p) { return $p }
+    }
+    # Edge — Chromium-based, agent-browser can use it
+    foreach ($p in @(
+        "C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
+        "C:\Program Files\Microsoft\Edge\Application\msedge.exe"
+    )) {
+        if (Test-Path $p) { return $p }
+    }
+    return $null
+}
+
+function Write-BrowserEnv {
+    param([string]$BrowserPath)
+    $envFile = Join-Path $HermesHome ".env"
+    New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null
+    if (Test-Path $envFile) {
+        $existing = Get-Content $envFile -Raw -ErrorAction SilentlyContinue
+        if ($existing -and ($existing -match "(?m)^AGENT_BROWSER_EXECUTABLE_PATH=")) {
+            return
+        }
+    }
+    Add-Content -Path $envFile -Value ""
+    Add-Content -Path $envFile -Value "# Hermes Agent browser tools — use the system Chrome/Chromium/Edge binary."
+    Add-Content -Path $envFile -Value "AGENT_BROWSER_EXECUTABLE_PATH=$BrowserPath"
+    Write-Success "Configured browser tools to use $BrowserPath"
+}
+
+function Confirm-ChromiumDownload {
+    if ($Yes) { return $true }
+    if (-not [Environment]::UserInteractive) {
+        Write-Warn "Non-interactive shell — skipping Chromium prompt."
+        Write-Info "Re-run with -Yes to install Chromium (~400 MB download)."
+        return $false
+    }
+    $reply = Read-Host "Install Playwright Chromium (~400 MB download)? [y/N]"
+    return ($reply -match "^(y|yes)$")
+}
+
+function Ensure-Chromium {
+    if ($SkipChromium) {
+        Write-Info "Skipping Chromium install (-SkipChromium)"
+        return
+    }
+
+    # agent-browser on Windows expects a Playwright-managed Chromium under
+    # %LOCALAPPDATA%\ms-playwright. The system-browser shortcut from the
+    # Linux/macOS path doesn't apply the same way on Windows — Playwright's
+    # default launch path won't pick up a stock Chrome install without an
+    # explicit AGENT_BROWSER_EXECUTABLE_PATH. We still offer it as a
+    # fallback when the user doesn't want the download.
+
+    if (-not (Confirm-ChromiumDownload)) {
+        $sys = Find-SystemBrowser
+        if ($sys) {
+            Write-Info "Using system browser at $sys (Chromium download skipped)."
+            Write-BrowserEnv -BrowserPath $sys
+        } else {
+            Write-Info "Chromium install skipped. Browser tools won't launch until"
+            Write-Info "Chromium is installed or AGENT_BROWSER_EXECUTABLE_PATH is set."
+        }
+        return
+    }
+
+    $npxExe = Resolve-NpxExe
+    if (-not $npxExe) {
+        Write-Err "npx not on PATH — cannot install Playwright Chromium"
+        throw "npx missing"
+    }
+
+    Write-Info "Installing Playwright Chromium (~400 MB) ..."
+    & $npxExe --yes playwright install chromium
+    if ($LASTEXITCODE -ne 0) {
+        Write-Err "Playwright Chromium install failed (exit $LASTEXITCODE)"
+        Write-Info "Try again later: npx --yes playwright install chromium"
+        throw "playwright"
+    }
+    Write-Success "Playwright Chromium installed"
+}
+
+# ─────────────────────────────────────────────────────────────────────────
+# Main
+# ─────────────────────────────────────────────────────────────────────────
+
+Write-Info "Hermes Agent: bootstrapping browser tools"
+Write-Info "  HERMES_HOME = $HermesHome"
+Write-Info "  OS          = Windows"
+
+Ensure-Node
+Ensure-AgentBrowser
+Ensure-Chromium
+
+Write-Success "Browser tools setup complete."
+Write-Info "Hermes Agent will pick up agent-browser from $NodePrefix on next launch."
diff --git a/acp_adapter/bootstrap/bootstrap_browser_tools.sh b/acp_adapter/bootstrap/bootstrap_browser_tools.sh
new file mode 100755
index 00000000000..9981069a6af
--- /dev/null
+++ b/acp_adapter/bootstrap/bootstrap_browser_tools.sh
@@ -0,0 +1,399 @@
+#!/usr/bin/env bash
+#
+# bootstrap_browser_tools.sh — install agent-browser + Playwright Chromium
+# into ~/.hermes/node/ for use by Hermes Agent's browser tools.
+#
+# Targets the registry-install path: users who got Hermes via
+# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone,
+# so the install.sh `npm install`-in-repo flow doesn't apply. This script
+# is a self-contained, idempotent slice of install.sh's browser block —
+# safe to run from `hermes-acp --setup-browser`, from a fresh terminal,
+# or from install.sh itself (it's a no-op when everything is already in place).
+#
+# Usage:
+#   bootstrap_browser_tools.sh           # use defaults
+#   bootstrap_browser_tools.sh --yes     # accept the ~400MB Chromium download
+#   bootstrap_browser_tools.sh --skip-chromium    # only install Node + agent-browser
+#   HERMES_HOME=/custom/path bootstrap_browser_tools.sh
+#
+# Idempotent: re-running this is safe and fast. Each step checks whether
+# the work is already done.
+
+set -euo pipefail
+
+# ─────────────────────────────────────────────────────────────────────────
+# Config
+# ─────────────────────────────────────────────────────────────────────────
+
+NODE_VERSION="22"
+HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
+NODE_PREFIX="$HERMES_HOME/node"
+
+SKIP_CHROMIUM=false
+ASSUME_YES=false
+
+# ─────────────────────────────────────────────────────────────────────────
+# Logging
+# ─────────────────────────────────────────────────────────────────────────
+
+if [ -t 1 ]; then
+    C_GREEN='\033[0;32m'
+    C_YELLOW='\033[0;33m'
+    C_BLUE='\033[0;34m'
+    C_RED='\033[0;31m'
+    C_RESET='\033[0m'
+else
+    C_GREEN='' ; C_YELLOW='' ; C_BLUE='' ; C_RED='' ; C_RESET=''
+fi
+
+log_info()    { printf "${C_BLUE}[*]${C_RESET} %s\n"  "$*"; }
+log_success() { printf "${C_GREEN}[✓]${C_RESET} %s\n" "$*"; }
+log_warn()    { printf "${C_YELLOW}[!]${C_RESET} %s\n" "$*" >&2; }
+log_error()   { printf "${C_RED}[✗]${C_RESET} %s\n"   "$*" >&2; }
+
+# ─────────────────────────────────────────────────────────────────────────
+# Arg parsing
+# ─────────────────────────────────────────────────────────────────────────
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --skip-chromium) SKIP_CHROMIUM=true ;;
+        --yes|-y)        ASSUME_YES=true ;;
+        -h|--help)
+            cat <<EOF
+Bootstrap Hermes Agent browser tools.
+
+Installs Node.js (into ~/.hermes/node/), the agent-browser npm package,
+and the Playwright Chromium browser engine.
+
+Options:
+  --skip-chromium   Install Node + agent-browser but skip Chromium download
+  --yes, -y         Accept the ~400 MB Chromium download without prompting
+  -h, --help        Show this help
+
+Environment:
+  HERMES_HOME       Override Hermes data dir (default: \$HOME/.hermes)
+EOF
+            exit 0
+            ;;
+        *)
+            log_error "Unknown option: $1"
+            exit 2
+            ;;
+    esac
+    shift
+done
+
+# ─────────────────────────────────────────────────────────────────────────
+# OS / arch detection
+# ─────────────────────────────────────────────────────────────────────────
+
+OS="unknown"
+case "$(uname -s)" in
+    Linux*)  OS="linux"  ;;
+    Darwin*) OS="macos"  ;;
+    *)
+        log_error "Unsupported OS: $(uname -s)"
+        log_info "Windows users: run scripts/bootstrap_browser_tools.ps1 in PowerShell."
+        exit 1
+        ;;
+esac
+
+NODE_ARCH=""
+case "$(uname -m)" in
+    x86_64)         NODE_ARCH="x64"    ;;
+    aarch64|arm64)  NODE_ARCH="arm64"  ;;
+    armv7l)         NODE_ARCH="armv7l" ;;
+    *)
+        log_error "Unsupported architecture: $(uname -m)"
+        exit 1
+        ;;
+esac
+
+NODE_OS=""
+case "$OS" in
+    linux) NODE_OS="linux"  ;;
+    macos) NODE_OS="darwin" ;;
+esac
+
+DISTRO=""
+if [ -f /etc/os-release ]; then
+    # shellcheck disable=SC1091
+    . /etc/os-release
+    DISTRO="${ID:-}"
+fi
+
+# ─────────────────────────────────────────────────────────────────────────
+# Step 1: Node.js
+# ─────────────────────────────────────────────────────────────────────────
+
+ensure_node() {
+    # Already on PATH and recent enough?
+    if command -v node >/dev/null 2>&1; then
+        local found_ver major
+        found_ver=$(node --version 2>/dev/null)
+        major=$(echo "$found_ver" | sed -E 's/^v([0-9]+).*/\1/')
+        if [ -n "$major" ] && [ "$major" -ge 20 ]; then
+            log_success "Node.js $found_ver found on PATH"
+            return 0
+        fi
+        log_warn "Node.js $found_ver is older than v20 — installing managed Node."
+    fi
+
+    if [ -x "$NODE_PREFIX/bin/node" ]; then
+        local found_ver
+        found_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?")
+        export PATH="$NODE_PREFIX/bin:$PATH"
+        log_success "Node.js $found_ver found (Hermes-managed at $NODE_PREFIX)"
+        return 0
+    fi
+
+    log_info "Installing Node.js $NODE_VERSION LTS into $NODE_PREFIX ..."
+
+    local index_url="https://nodejs.org/dist/latest-v${NODE_VERSION}.x/"
+    local tarball_name
+    tarball_name=$(curl -fsSL "$index_url" \
+        | grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.xz" \
+        | head -1)
+
+    if [ -z "$tarball_name" ]; then
+        tarball_name=$(curl -fsSL "$index_url" \
+            | grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.gz" \
+            | head -1)
+    fi
+
+    if [ -z "$tarball_name" ]; then
+        log_error "Could not locate Node.js $NODE_VERSION tarball for $NODE_OS-$NODE_ARCH"
+        log_info "Install Node 20+ manually: https://nodejs.org/en/download/"
+        return 1
+    fi
+
+    local tmp_dir
+    tmp_dir=$(mktemp -d)
+    trap 'rm -rf "$tmp_dir"' RETURN
+
+    log_info "Downloading $tarball_name ..."
+    if ! curl -fsSL "${index_url}${tarball_name}" -o "$tmp_dir/$tarball_name"; then
+        log_error "Node.js download failed"
+        return 1
+    fi
+
+    if [[ "$tarball_name" == *.tar.xz ]]; then
+        tar xf "$tmp_dir/$tarball_name" -C "$tmp_dir"
+    else
+        tar xzf "$tmp_dir/$tarball_name" -C "$tmp_dir"
+    fi
+
+    local extracted_dir
+    extracted_dir=$(ls -d "$tmp_dir"/node-v* 2>/dev/null | head -1)
+    if [ ! -d "$extracted_dir" ]; then
+        log_error "Node.js extraction failed"
+        return 1
+    fi
+
+    mkdir -p "$HERMES_HOME"
+    rm -rf "$NODE_PREFIX"
+    mv "$extracted_dir" "$NODE_PREFIX"
+
+    export PATH="$NODE_PREFIX/bin:$PATH"
+
+    local installed_ver
+    installed_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?")
+    log_success "Node.js $installed_ver installed to $NODE_PREFIX"
+}
+
+# ─────────────────────────────────────────────────────────────────────────
+# Step 2: agent-browser + @askjo/camofox-browser via global npm install
+# ─────────────────────────────────────────────────────────────────────────
+
+ensure_agent_browser() {
+    if ! command -v npm >/dev/null 2>&1; then
+        log_error "npm not on PATH after Node install — aborting"
+        return 1
+    fi
+
+    # _find_agent_browser() in tools/browser_tool.py walks ~/.hermes/node/bin
+    # plus a few standard prefixes, so installing globally into the managed
+    # Node prefix is enough — no PATH manipulation needed from the agent side.
+    if [ -x "$NODE_PREFIX/bin/agent-browser" ] || command -v agent-browser >/dev/null 2>&1; then
+        log_success "agent-browser already installed"
+        return 0
+    fi
+
+    # When the system's `npm` resolves to a root-owned prefix (e.g.
+    # /usr/lib/node_modules), `npm install -g` fails with EACCES without
+    # sudo. Force the prefix to the user-writable Hermes-managed Node
+    # directory so we never need sudo and the agent can always find the
+    # result. If we installed Node ourselves above, this is a no-op
+    # (managed Node already uses $NODE_PREFIX). If the user has system
+    # Node, we still drop agent-browser under $NODE_PREFIX/bin/ — which
+    # is exactly where _browser_candidate_path_dirs() looks first.
+    mkdir -p "$NODE_PREFIX"
+
+    log_info "Installing agent-browser (npm, prefix=$NODE_PREFIX)..."
+    if ! npm install -g --prefix "$NODE_PREFIX" --silent \
+            agent-browser@^0.26.0 \
+            "@askjo/camofox-browser@^1.5.2"; then
+        log_error "npm install -g agent-browser failed"
+        return 1
+    fi
+
+    # macOS/Linux global installs place the shim into $NODE_PREFIX/bin/.
+    # Add it to PATH for any subsequent steps (npx playwright).
+    export PATH="$NODE_PREFIX/bin:$PATH"
+
+    log_success "agent-browser installed to $NODE_PREFIX/bin/"
+}
+
+# ─────────────────────────────────────────────────────────────────────────
+# Step 3: Playwright Chromium
+# ─────────────────────────────────────────────────────────────────────────
+
+confirm_chromium_download() {
+    if [ "$ASSUME_YES" = true ]; then return 0; fi
+    if [ ! -t 0 ]; then
+        log_warn "Non-interactive shell — skipping Chromium prompt."
+        log_info "Re-run with --yes to install Chromium (~400 MB download)."
+        return 1
+    fi
+    printf "Install Playwright Chromium (~400 MB download)? [y/N] "
+    local reply=""
+    read -r reply || reply=""
+    case "$reply" in
+        y|Y|yes|YES) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
+# Detect a usable system Chrome/Chromium. agent-browser's Chrome engine can
+# use it instead of downloading Playwright's bundled Chromium, saving the
+# download cost. Returns the path or empty string.
+find_system_browser() {
+    local candidate
+    for candidate in google-chrome google-chrome-stable chromium chromium-browser chrome; do
+        if command -v "$candidate" >/dev/null 2>&1; then
+            command -v "$candidate"
+            return 0
+        fi
+    done
+    # macOS app-bundle locations
+    if [ "$OS" = "macos" ]; then
+        for candidate in \
+            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
+            "/Applications/Chromium.app/Contents/MacOS/Chromium" ; do
+            if [ -x "$candidate" ]; then
+                echo "$candidate"
+                return 0
+            fi
+        done
+    fi
+    return 1
+}
+
+write_browser_env() {
+    local browser_path="$1"
+    local env_file="$HERMES_HOME/.env"
+    mkdir -p "$HERMES_HOME"
+    if [ -f "$env_file" ] && grep -q "^AGENT_BROWSER_EXECUTABLE_PATH=" "$env_file"; then
+        return 0
+    fi
+    {
+        echo ""
+        echo "# Hermes Agent browser tools — use the system Chrome/Chromium binary."
+        echo "AGENT_BROWSER_EXECUTABLE_PATH=$browser_path"
+    } >> "$env_file"
+    log_success "Configured browser tools to use $browser_path"
+}
+
+ensure_chromium() {
+    if [ "$SKIP_CHROMIUM" = true ]; then
+        log_info "Skipping Chromium install (--skip-chromium)"
+        return 0
+    fi
+
+    local system_browser
+    system_browser="$(find_system_browser 2>/dev/null || true)"
+    if [ -n "$system_browser" ]; then
+        log_success "Found system browser: $system_browser"
+        log_info "Skipping Playwright Chromium download; agent-browser will use it."
+        write_browser_env "$system_browser"
+        return 0
+    fi
+
+    if ! confirm_chromium_download; then
+        log_info "Chromium install skipped. Browser tools will only work if you"
+        log_info "set AGENT_BROWSER_EXECUTABLE_PATH or install Chromium later."
+        return 0
+    fi
+
+    if ! command -v npx >/dev/null 2>&1; then
+        log_error "npx not on PATH — cannot install Playwright Chromium"
+        return 1
+    fi
+
+    log_info "Installing Playwright Chromium (~400 MB) ..."
+
+    # On apt-based distros, --with-deps requires sudo. Try non-interactively
+    # only — never prompt — and fall back to the bare browser-only install.
+    local installed=false
+    if [ "$OS" = "linux" ]; then
+        case "$DISTRO" in
+            ubuntu|debian|raspbian|pop|linuxmint|elementary|zorin|kali|parrot)
+                if [ "$(id -u)" -eq 0 ] || (command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null); then
+                    log_info "Installing system deps with --with-deps (sudo available)"
+                    if npx --yes playwright install --with-deps chromium; then
+                        installed=true
+                    fi
+                else
+                    log_warn "sudo not available non-interactively — installing Chromium without system deps."
+                    log_info "If browser tools fail to launch, an administrator should run:"
+                    log_info "  sudo npx playwright install-deps chromium"
+                fi
+                ;;
+            arch|manjaro|cachyos|endeavouros|garuda)
+                log_info "Arch-family system dependencies are not auto-installed."
+                log_info "If launch fails, run: sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
+                ;;
+            fedora|rhel|centos|rocky|alma)
+                log_info "Fedora/RHEL system dependencies are not auto-installed."
+                log_info "If launch fails, run: sudo dnf install nss atk at-spi2-core cups-libs libdrm libxkbcommon mesa-libgbm pango cairo alsa-lib"
+                ;;
+            opensuse*|sles)
+                log_info "openSUSE system dependencies are not auto-installed."
+                ;;
+        esac
+    fi
+
+    if [ "$installed" = false ]; then
+        if npx --yes playwright install chromium; then
+            installed=true
+        fi
+    fi
+
+    if [ "$installed" = true ]; then
+        log_success "Playwright Chromium installed"
+    else
+        log_error "Playwright Chromium install failed"
+        log_info "Try again later: npx --yes playwright install chromium"
+        return 1
+    fi
+}
+
+# ─────────────────────────────────────────────────────────────────────────
+# Main
+# ─────────────────────────────────────────────────────────────────────────
+
+main() {
+    log_info "Hermes Agent: bootstrapping browser tools"
+    log_info "  HERMES_HOME = $HERMES_HOME"
+    log_info "  OS / arch   = $NODE_OS-$NODE_ARCH ${DISTRO:+($DISTRO)}"
+
+    ensure_node
+    ensure_agent_browser
+    ensure_chromium
+
+    log_success "Browser tools setup complete."
+    log_info "Hermes Agent will pick up agent-browser from $NODE_PREFIX/bin/ on next launch."
+}
+
+main
diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py
index 48e677a6522..cf5c2ba9cfb 100644
--- a/acp_adapter/entry.py
+++ b/acp_adapter/entry.py
@@ -124,6 +124,20 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
         action="store_true",
         help="Run interactive Hermes provider/model setup for ACP terminal auth",
     )
+    parser.add_argument(
+        "--setup-browser",
+        action="store_true",
+        help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ "
+             "for browser tool support. Idempotent.",
+    )
+    parser.add_argument(
+        "--yes",
+        "-y",
+        action="store_true",
+        dest="assume_yes",
+        help="Accept all prompts (currently used by --setup-browser to skip the "
+             "~400 MB Chromium download confirmation).",
+    )
     return parser.parse_args(argv)
 
 
@@ -150,6 +164,75 @@ def _run_setup() -> None:
     finally:
         sys.argv = old_argv
 
+    # Offer browser-tools install as a follow-up. The terminal auth method
+    # is the one supported first-run UX for registry installs, so this is
+    # the natural moment to ask. Skip silently if stdin isn't a TTY (the
+    # answer can't be collected anyway).
+    if not sys.stdin.isatty():
+        return
+    try:
+        reply = input(
+            "\nInstall browser tools? Downloads agent-browser (npm) and "
+            "optionally Playwright Chromium (~400 MB). [y/N] "
+        ).strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        return
+    if reply in {"y", "yes"}:
+        _run_setup_browser(assume_yes=False)
+
+
+def _run_setup_browser(assume_yes: bool = False) -> int:
+    """Bootstrap agent-browser + Playwright Chromium for the registry-install path.
+
+    Shells out to the bundled platform-specific bootstrap script
+    (acp_adapter/bootstrap/bootstrap_browser_tools.{sh,ps1}) so the install
+    logic lives in one place — readable, debuggable, and shareable with
+    install.sh / install.ps1 if we ever want to call it from there too.
+
+    Returns the script's exit code (0 on success).
+    """
+    import platform
+    import subprocess
+
+    bootstrap_dir = Path(__file__).resolve().parent / "bootstrap"
+
+    if platform.system() == "Windows":
+        script = bootstrap_dir / "bootstrap_browser_tools.ps1"
+        if not script.is_file():
+            print(
+                f"Bootstrap script not found at {script} — wheel may be incomplete.",
+                file=sys.stderr,
+            )
+            return 1
+        cmd = [
+            "powershell.exe",
+            "-NoProfile",
+            "-ExecutionPolicy", "Bypass",
+            "-File", str(script),
+        ]
+        if assume_yes:
+            cmd.append("-Yes")
+    else:
+        script = bootstrap_dir / "bootstrap_browser_tools.sh"
+        if not script.is_file():
+            print(
+                f"Bootstrap script not found at {script} — wheel may be incomplete.",
+                file=sys.stderr,
+            )
+            return 1
+        cmd = ["bash", str(script)]
+        if assume_yes:
+            cmd.append("--yes")
+
+    # stdio is inherited so the user sees the bootstrap's progress live.
+    try:
+        result = subprocess.run(cmd, check=False)
+    except FileNotFoundError as exc:
+        # bash / powershell.exe not on PATH
+        print(f"Could not launch browser bootstrap: {exc}", file=sys.stderr)
+        return 1
+    return result.returncode
+
 
 def main(argv: list[str] | None = None) -> None:
     """Entry point: load env, configure logging, run the ACP agent."""
@@ -163,6 +246,11 @@ def main(argv: list[str] | None = None) -> None:
     if args.setup:
         _run_setup()
         return
+    if args.setup_browser:
+        rc = _run_setup_browser(assume_yes=args.assume_yes)
+        if rc != 0:
+            sys.exit(rc)
+        return
 
     _setup_logging()
     _load_env()
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 6b770edaf28..833172a23b9 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -11715,6 +11715,20 @@ Examples:
         action="store_true",
         help="Run interactive Hermes provider/model setup for ACP terminal auth",
     )
+    acp_parser.add_argument(
+        "--setup-browser",
+        action="store_true",
+        help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ "
+             "for browser tool support (idempotent).",
+    )
+    acp_parser.add_argument(
+        "--yes",
+        "-y",
+        action="store_true",
+        dest="assume_yes",
+        help="Accept all prompts (used by --setup-browser to skip the "
+             "~400 MB Chromium download confirmation).",
+    )
 
     def cmd_acp(args):
         """Launch Hermes Agent as an ACP server."""
@@ -11728,6 +11742,10 @@ Examples:
                 acp_argv.append("--check")
             if getattr(args, "setup", False):
                 acp_argv.append("--setup")
+            if getattr(args, "setup_browser", False):
+                acp_argv.append("--setup-browser")
+            if getattr(args, "assume_yes", False):
+                acp_argv.append("--yes")
             acp_main(acp_argv)
         except ImportError:
             print("ACP dependencies not installed.", file=sys.stderr)
diff --git a/pyproject.toml b/pyproject.toml
index 20fecac228e..ae2fff385a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -212,9 +212,10 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
 [tool.setuptools.package-data]
 hermes_cli = ["web_dist/**/*"]
 gateway = ["assets/**/*"]
+acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"]
 
 [tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "acp_adapter.*", "plugins", "plugins.*", "providers", "providers.*"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/tests/acp/test_entry.py b/tests/acp/test_entry.py
index 4c7e55f1d4b..81d30cd868c 100644
--- a/tests/acp/test_entry.py
+++ b/tests/acp/test_entry.py
@@ -1,6 +1,9 @@
 """Tests for acp_adapter.entry startup wiring."""
 
+import sys
+
 import acp
+import pytest
 
 from acp_adapter import entry
 
@@ -42,12 +45,152 @@ def test_main_setup_runs_model_configuration(monkeypatch):
     calls = {}
 
     def fake_hermes_main():
-        import sys
-
         calls["argv"] = sys.argv[:]
 
     monkeypatch.setattr("hermes_cli.main.main", fake_hermes_main)
+    # Pretend stdin is not a TTY so the follow-up browser prompt is skipped.
+    # That keeps this test focused on the model-setup wiring; the
+    # browser-prompt path has its own test below.
+    monkeypatch.setattr("sys.stdin.isatty", lambda: False)
 
     entry.main(["--setup"])
 
     assert calls["argv"][1:] == ["model"]
+
+
+def test_main_setup_offers_browser_install_when_tty(monkeypatch):
+    """When stdin is a TTY and the user answers yes, model setup is followed
+    by a browser-tools bootstrap call."""
+    monkeypatch.setattr("hermes_cli.main.main", lambda: None)
+    monkeypatch.setattr("sys.stdin.isatty", lambda: True)
+    monkeypatch.setattr("builtins.input", lambda *_args, **_kwargs: "y")
+
+    bootstrap_calls = []
+    monkeypatch.setattr(
+        entry,
+        "_run_setup_browser",
+        lambda assume_yes=False: bootstrap_calls.append(assume_yes) or 0,
+    )
+
+    entry.main(["--setup"])
+
+    assert bootstrap_calls == [False]
+
+
+def test_main_setup_skips_browser_prompt_on_no(monkeypatch):
+    monkeypatch.setattr("hermes_cli.main.main", lambda: None)
+    monkeypatch.setattr("sys.stdin.isatty", lambda: True)
+    monkeypatch.setattr("builtins.input", lambda *_args, **_kwargs: "")
+
+    called = []
+    monkeypatch.setattr(
+        entry,
+        "_run_setup_browser",
+        lambda assume_yes=False: called.append(assume_yes) or 0,
+    )
+
+    entry.main(["--setup"])
+
+    assert called == []
+
+
+def test_main_setup_browser_invokes_bundled_script(monkeypatch):
+    """`hermes-acp --setup-browser` must shell out to the bundled bootstrap
+    script — never reimplement the install logic inline."""
+    monkeypatch.setattr("platform.system", lambda: "Linux")
+
+    captured = {}
+
+    def fake_run(cmd, check=False):
+        captured["cmd"] = cmd
+
+        class _R:
+            returncode = 0
+
+        return _R()
+
+    monkeypatch.setattr("subprocess.run", fake_run)
+
+    entry.main(["--setup-browser"])
+
+    assert captured["cmd"][0] == "bash"
+    assert captured["cmd"][1].endswith("bootstrap_browser_tools.sh")
+    # --yes is NOT passed when the flag is absent.
+    assert "--yes" not in captured["cmd"]
+
+
+def test_main_setup_browser_forwards_yes_flag(monkeypatch):
+    monkeypatch.setattr("platform.system", lambda: "Linux")
+
+    captured = {}
+
+    def fake_run(cmd, check=False):
+        captured["cmd"] = cmd
+
+        class _R:
+            returncode = 0
+
+        return _R()
+
+    monkeypatch.setattr("subprocess.run", fake_run)
+
+    entry.main(["--setup-browser", "--yes"])
+
+    assert "--yes" in captured["cmd"]
+
+
+def test_main_setup_browser_uses_powershell_on_windows(monkeypatch):
+    monkeypatch.setattr("platform.system", lambda: "Windows")
+
+    captured = {}
+
+    def fake_run(cmd, check=False):
+        captured["cmd"] = cmd
+
+        class _R:
+            returncode = 0
+
+        return _R()
+
+    monkeypatch.setattr("subprocess.run", fake_run)
+
+    entry.main(["--setup-browser", "--yes"])
+
+    assert captured["cmd"][0] == "powershell.exe"
+    assert any(part.endswith("bootstrap_browser_tools.ps1") for part in captured["cmd"])
+    assert "-Yes" in captured["cmd"]
+
+
+def test_main_setup_browser_propagates_failure(monkeypatch):
+    monkeypatch.setattr("platform.system", lambda: "Linux")
+
+    class _R:
+        returncode = 7
+
+    monkeypatch.setattr("subprocess.run", lambda cmd, check=False: _R())
+
+    with pytest.raises(SystemExit) as excinfo:
+        entry.main(["--setup-browser"])
+    assert excinfo.value.code == 7
+
+
+def test_bootstrap_scripts_ship_with_package():
+    """The package-data wiring (pyproject.toml) must include the bootstrap
+    scripts — otherwise `--setup-browser` 404s at runtime."""
+    from pathlib import Path
+
+    bootstrap_dir = Path(entry.__file__).resolve().parent / "bootstrap"
+    sh = bootstrap_dir / "bootstrap_browser_tools.sh"
+    ps1 = bootstrap_dir / "bootstrap_browser_tools.ps1"
+
+    assert sh.is_file(), f"missing bundled script: {sh}"
+    assert ps1.is_file(), f"missing bundled script: {ps1}"
+
+    sh_text = sh.read_text(encoding="utf-8")
+    ps1_text = ps1.read_text(encoding="utf-8")
+
+    # Sanity: scripts know how to find the Hermes-managed Node prefix.
+    assert "HERMES_HOME" in sh_text
+    assert "agent-browser" in sh_text
+    assert "HermesHome" in ps1_text
+    assert "agent-browser" in ps1_text
diff --git a/website/docs/user-guide/features/acp.md b/website/docs/user-guide/features/acp.md
index 92a755c9ada..6540748c889 100644
--- a/website/docs/user-guide/features/acp.md
+++ b/website/docs/user-guide/features/acp.md
@@ -78,6 +78,27 @@ hermes acp --version
 hermes acp --check
 ```
 
+### Browser tools (optional)
+
+Browser tools (`browser_navigate`, `browser_click`, etc.) depend on the
+`agent-browser` npm package and Chromium, which aren't part of the Python
+wheel. Install them with:
+
+```bash
+hermes acp --setup-browser           # interactive (prompts before ~400 MB download)
+hermes acp --setup-browser --yes     # accept the download non-interactively
+```
+
+This is the standalone command. The Zed registry's terminal-auth flow (`hermes acp --setup`) also offers the browser bootstrap as a follow-up question after model selection, so most users never need to run `--setup-browser` directly.
+
+What it does:
+
+- Installs Node.js 22 LTS into `~/.hermes/node/` if missing
+- `npm install -g agent-browser @askjo/camofox-browser` into that prefix (no sudo needed — `npm`'s `--prefix` points at the user-writable Hermes-managed Node)
+- Installs Playwright Chromium, or uses a detected system Chrome/Chromium when available
+
+The bootstrap is idempotent — re-running it is fast and skips work that's already done.
+
 ## Editor setup
 
 ### VS Code

From 09d9724a09197b1981c318f3c51c55bc52fdfe29 Mon Sep 17 00:00:00 2001
From: Mibayy <louismichalot@hotmail.com>
Date: Fri, 15 May 2026 01:33:49 -0700
Subject: [PATCH 022/218] feat(gateway): add SimpleX Chat platform plugin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SimpleX Chat (https://simplex.chat) is a private, decentralised messenger
with no persistent user IDs — every contact is identified by an opaque
internal ID generated at connection time. This adds it as a Hermes
gateway platform via the plugin system.

The adapter connects to a local simplex-chat daemon via WebSocket,
listens for inbound messages, and sends replies. Originally proposed in
PR #2558 as a core-modifying integration; reshaped here as a self-
contained plugin under plugins/platforms/simplex/ with no edits to any
core file. Discovery is filesystem-based (scanned by gateway.config),
and the platform identity is resolved on demand via Platform("simplex").

Plugin contract:
- check_requirements() requires SIMPLEX_WS_URL AND the websockets package
- validate_config() / is_connected() accept env or config.yaml input
- _env_enablement() seeds PlatformConfig.extra (ws_url + home_channel)
- _standalone_send() supports out-of-process cron delivery
- interactive_setup() provides a stdin wizard for hermes gateway setup
- register() wires the adapter into the registry with required_env,
  install_hint, cron_deliver_env_var, allowed_users_env, and a
  platform_hint for the LLM.

Lazy dependency: the websockets Python package is imported inside the
functions that need it. The plugin is importable and discoverable even
when websockets is missing — check_requirements() simply returns False
until `pip install websockets` is run. No new pyproject extras are
introduced.

Environment variables:
  SIMPLEX_WS_URL             WebSocket URL of the daemon (required)
  SIMPLEX_ALLOWED_USERS      Comma-separated allowed contact IDs
  SIMPLEX_ALLOW_ALL_USERS    Set true to allow all contacts
  SIMPLEX_HOME_CHANNEL       Default contact for cron delivery
  SIMPLEX_HOME_CHANNEL_NAME  Human label for the home channel

Closes #2557.
---
 plugins/platforms/simplex/__init__.py        |   3 +
 plugins/platforms/simplex/adapter.py         | 746 +++++++++++++++++++
 plugins/platforms/simplex/plugin.yaml        |  37 +
 tests/gateway/test_simplex_plugin.py         | 347 +++++++++
 website/docs/user-guide/messaging/simplex.md |  99 +++
 5 files changed, 1232 insertions(+)
 create mode 100644 plugins/platforms/simplex/__init__.py
 create mode 100644 plugins/platforms/simplex/adapter.py
 create mode 100644 plugins/platforms/simplex/plugin.yaml
 create mode 100644 tests/gateway/test_simplex_plugin.py
 create mode 100644 website/docs/user-guide/messaging/simplex.md

diff --git a/plugins/platforms/simplex/__init__.py b/plugins/platforms/simplex/__init__.py
new file mode 100644
index 00000000000..d4f1d7bf0e3
--- /dev/null
+++ b/plugins/platforms/simplex/__init__.py
@@ -0,0 +1,3 @@
+from .adapter import register
+
+__all__ = ["register"]
diff --git a/plugins/platforms/simplex/adapter.py b/plugins/platforms/simplex/adapter.py
new file mode 100644
index 00000000000..b568f29bbb5
--- /dev/null
+++ b/plugins/platforms/simplex/adapter.py
@@ -0,0 +1,746 @@
+"""SimpleX Chat platform adapter (Hermes plugin).
+
+Connects to a simplex-chat daemon running in WebSocket mode.
+Inbound messages arrive via a persistent WebSocket connection.
+Outbound messages use the same WebSocket with JSON commands.
+
+This adapter ships as a Hermes platform plugin under
+``plugins/platforms/simplex/``. The Hermes plugin loader scans the
+directory at startup, calls ``register(ctx)``, and the platform
+becomes available to ``gateway/run.py`` and ``tools/send_message_tool``
+through the registry — no edits to core files are required.
+
+SimpleX chat daemon setup:
+    simplex-chat -p 5225          # start daemon on port 5225
+    # or via Docker:
+    # docker run -p 5225:5225 simplexchat/simplex-chat-cli -p 5225
+
+Required environment variables:
+    SIMPLEX_WS_URL             WebSocket URL of the daemon
+                               (default: ws://127.0.0.1:5225)
+
+Optional environment variables:
+    SIMPLEX_ALLOWED_USERS      Comma-separated contact IDs (allowlist)
+    SIMPLEX_ALLOW_ALL_USERS    Set 'true' to allow all contacts
+    SIMPLEX_HOME_CHANNEL       Default contact/group ID for cron delivery
+    SIMPLEX_HOME_CHANNEL_NAME  Human label for the home channel
+
+The ``websockets`` Python package is imported lazily — the plugin is
+discoverable and `hermes setup` can describe it even when websockets is
+not installed. ``check_requirements()`` returns False until the package
+is present, so the gateway will not attempt to instantiate the adapter.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import random
+import time
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+# Lazy import: BasePlatformAdapter and friends live in the main repo.
+# Imported at module top because they're stdlib-only inside Hermes — no
+# external dependency that would block the plugin from loading.
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+    cache_image_from_bytes,
+    cache_audio_from_bytes,
+    cache_document_from_bytes,
+)
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+MAX_MESSAGE_LENGTH = 16_000  # SimpleX has no hard limit; keep chunking sane
+TYPING_INTERVAL = 10.0
+WS_RETRY_DELAY_INITIAL = 2.0
+WS_RETRY_DELAY_MAX = 60.0
+HEALTH_CHECK_INTERVAL = 30.0
+HEALTH_CHECK_STALE_THRESHOLD = 120.0
+
+# Correlation ID prefix for requests we send so we can ignore our own echoes.
+_CORR_PREFIX = "hermes-"
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _parse_comma_list(value: str) -> List[str]:
+    """Split a comma-separated string into a stripped list."""
+    return [v.strip() for v in value.split(",") if v.strip()]
+
+
+def _guess_extension(data: bytes) -> str:
+    """Guess file extension from magic bytes."""
+    if data[:4] == b"\x89PNG":
+        return ".png"
+    if data[:2] == b"\xff\xd8":
+        return ".jpg"
+    if data[:4] == b"GIF8":
+        return ".gif"
+    if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
+        return ".webp"
+    if data[:4] == b"%PDF":
+        return ".pdf"
+    if len(data) >= 8 and data[4:8] == b"ftyp":
+        return ".mp4"
+    if data[:4] == b"OggS":
+        return ".ogg"
+    if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0:
+        return ".mp3"
+    return ".bin"
+
+
+def _is_image_ext(ext: str) -> bool:
+    return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp")
+
+
+def _is_audio_ext(ext: str) -> bool:
+    return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac")
+
+
+# ---------------------------------------------------------------------------
+# SimpleX Adapter
+# ---------------------------------------------------------------------------
+
+class SimplexAdapter(BasePlatformAdapter):
+    """SimpleX Chat adapter using the simplex-chat daemon WebSocket API.
+
+    Instantiated by the ``adapter_factory`` passed to
+    ``ctx.register_platform()`` in :func:`register`.
+    """
+
+    def __init__(self, config: PlatformConfig, **kwargs):
+        platform = Platform("simplex")
+        super().__init__(config=config, platform=platform)
+
+        extra = getattr(config, "extra", {}) or {}
+        self.ws_url = extra.get("ws_url", "ws://127.0.0.1:5225").rstrip("/")
+
+        # Running state
+        self._ws = None  # websockets connection
+        self._ws_task: Optional[asyncio.Task] = None
+        self._health_task: Optional[asyncio.Task] = None
+        self._typing_tasks: Dict[str, asyncio.Task] = {}
+        self._running = False
+        self._last_ws_activity = 0.0
+
+        # Track sent correlation IDs to filter echoes
+        self._pending_corr_ids: set = set()
+        self._max_pending_corr = 200
+
+        logger.info("SimpleX adapter initialized: url=%s", self.ws_url)
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    async def connect(self) -> bool:
+        """Connect to the simplex-chat daemon and start the WebSocket listener."""
+        try:
+            import websockets  # noqa: F401
+        except ImportError:
+            logger.error(
+                "SimpleX: 'websockets' package not installed. "
+                "Run: pip install websockets"
+            )
+            return False
+
+        if not self.ws_url:
+            logger.error("SimpleX: SIMPLEX_WS_URL is required")
+            return False
+
+        # Quick connectivity check — try to open and immediately close
+        try:
+            import websockets as _wsclient
+            async with _wsclient.connect(self.ws_url, open_timeout=10):
+                pass
+        except Exception as e:
+            logger.error("SimpleX: cannot reach daemon at %s: %s", self.ws_url, e)
+            return False
+
+        self._running = True
+        self._last_ws_activity = time.time()
+        self._ws_task = asyncio.create_task(self._ws_listener())
+        self._health_task = asyncio.create_task(self._health_monitor())
+
+        logger.info("SimpleX: connected to %s", self.ws_url)
+        return True
+
+    async def disconnect(self) -> None:
+        """Stop WebSocket listener and clean up."""
+        self._running = False
+
+        if self._ws_task:
+            self._ws_task.cancel()
+            try:
+                await self._ws_task
+            except asyncio.CancelledError:
+                pass
+
+        if self._health_task:
+            self._health_task.cancel()
+            try:
+                await self._health_task
+            except asyncio.CancelledError:
+                pass
+
+        for task in self._typing_tasks.values():
+            task.cancel()
+        self._typing_tasks.clear()
+
+        if self._ws:
+            try:
+                await self._ws.close()
+            except Exception:
+                pass
+            self._ws = None
+
+        logger.info("SimpleX: disconnected")
+
+    # ------------------------------------------------------------------
+    # WebSocket listener
+    # ------------------------------------------------------------------
+
+    async def _ws_listener(self) -> None:
+        """Maintain a persistent WebSocket connection to the daemon."""
+        import websockets as _wsclient
+        import websockets as _wsexc
+
+        backoff = WS_RETRY_DELAY_INITIAL
+
+        while self._running:
+            try:
+                logger.debug("SimpleX WS: connecting to %s", self.ws_url)
+                async with _wsclient.connect(
+                    self.ws_url,
+                    ping_interval=20,
+                    ping_timeout=20,
+                ) as ws:
+                    self._ws = ws
+                    backoff = WS_RETRY_DELAY_INITIAL
+                    self._last_ws_activity = time.time()
+                    logger.info("SimpleX WS: connected")
+
+                    async for raw in ws:
+                        if not self._running:
+                            break
+                        self._last_ws_activity = time.time()
+                        try:
+                            msg = json.loads(raw)
+                            await self._handle_event(msg)
+                        except json.JSONDecodeError:
+                            logger.debug("SimpleX WS: invalid JSON: %.100s", raw)
+                        except Exception:
+                            logger.exception("SimpleX WS: error handling event")
+
+            except asyncio.CancelledError:
+                break
+            except _wsexc.WebSocketException as e:
+                if self._running:
+                    logger.warning(
+                        "SimpleX WS: error: %s (reconnecting in %.0fs)", e, backoff
+                    )
+            except Exception as e:
+                if self._running:
+                    logger.warning(
+                        "SimpleX WS: unexpected error: %s (reconnecting in %.0fs)",
+                        e, backoff,
+                    )
+            finally:
+                self._ws = None
+
+            if self._running:
+                jitter = backoff * 0.2 * random.random()
+                await asyncio.sleep(backoff + jitter)
+                backoff = min(backoff * 2, WS_RETRY_DELAY_MAX)
+
+    # ------------------------------------------------------------------
+    # Health monitor
+    # ------------------------------------------------------------------
+
+    async def _health_monitor(self) -> None:
+        """Force reconnect if the WebSocket has been idle too long."""
+        while self._running:
+            await asyncio.sleep(HEALTH_CHECK_INTERVAL)
+            if not self._running:
+                break
+
+            elapsed = time.time() - self._last_ws_activity
+            if elapsed > HEALTH_CHECK_STALE_THRESHOLD:
+                logger.warning(
+                    "SimpleX: WS idle for %.0fs, forcing reconnect", elapsed
+                )
+                self._last_ws_activity = time.time()
+                if self._ws:
+                    try:
+                        await self._ws.close()
+                    except Exception:
+                        pass
+
+    # ------------------------------------------------------------------
+    # Inbound event handling
+    # ------------------------------------------------------------------
+
+    async def _handle_event(self, event: dict) -> None:
+        """Dispatch a daemon event to the appropriate handler."""
+        resp_type = event.get("type") or event.get("resp", {}).get("type", "")
+
+        # Filter responses to our own commands (echoes)
+        corr_id = event.get("corrId", "")
+        if corr_id and corr_id.startswith(_CORR_PREFIX):
+            self._pending_corr_ids.discard(corr_id)
+            return
+
+        if resp_type == "newChatItem":
+            await self._handle_new_chat_item(event)
+        elif resp_type == "newChatItems":
+            # Batch variant — process each item
+            items = event.get("chatItems") or []
+            for item_wrapper in items:
+                await self._handle_new_chat_item(item_wrapper)
+        # Ignore all other event types (delivery receipts, contact updates, etc.)
+
+    async def _handle_new_chat_item(self, wrapper: dict) -> None:
+        """Process a single newChatItem event into a MessageEvent."""
+        # The daemon wraps the chat item differently depending on version;
+        # normalise both layouts.
+        chat_info = wrapper.get("chatInfo") or wrapper.get("chat") or {}
+        chat_item = wrapper.get("chatItem") or wrapper.get("item") or {}
+
+        # Only process messages (not calls, deleted items, etc.)
+        item_content = chat_item.get("content") or {}
+        msg_content = item_content.get("msgContent") or {}
+        if not msg_content:
+            return
+
+        # Filter out messages sent by us (direction == "snd")
+        meta = chat_item.get("meta") or {}
+        direction = (meta.get("itemStatus") or {}).get("type", "")
+        if direction in ("sndSent", "sndSentDirect", "sndSentViaProxy", "sndNew"):
+            return
+
+        # Determine chat type and IDs
+        chat_type_raw = chat_info.get("type", "")
+        is_group = chat_type_raw in ("group", "groupInfo")
+
+        if is_group:
+            group_info = chat_info.get("groupInfo") or chat_info.get("group") or {}
+            group_id = str(group_info.get("groupId") or group_info.get("id") or "")
+            group_name = group_info.get("displayName") or group_info.get("groupProfile", {}).get("displayName", "")
+            chat_id = f"group:{group_id}" if group_id else ""
+            chat_name = group_name
+        else:
+            contact_info = chat_info.get("contact") or {}
+            contact_id = str(contact_info.get("contactId") or contact_info.get("id") or "")
+            contact_name = (
+                contact_info.get("displayName")
+                or contact_info.get("localDisplayName")
+                or contact_id
+            )
+            chat_id = contact_id
+            chat_name = contact_name
+
+        if not chat_id:
+            logger.debug("SimpleX: ignoring event with no chat_id")
+            return
+
+        # Sender — for groups the message includes a chatItemMember sub-object
+        member = chat_item.get("chatItemMember") or {}
+        if is_group and member:
+            sender_id = str(member.get("memberId") or member.get("id") or chat_id)
+            sender_name = (
+                member.get("displayName")
+                or member.get("localDisplayName")
+                or sender_id
+            )
+        else:
+            sender_id = chat_id
+            sender_name = chat_name
+
+        # Extract text
+        text = msg_content.get("text") or ""
+
+        # Media attachments
+        media_urls: List[str] = []
+        media_types: List[str] = []
+        file_info = chat_item.get("file") or {}
+        if file_info and file_info.get("fileStatus") not in ("cancelled", "error"):
+            file_id = file_info.get("fileId")
+            file_name = file_info.get("fileName", "file")
+            if file_id:
+                try:
+                    cached = await self._fetch_file(file_id, file_name)
+                    if cached:
+                        ext = cached.rsplit(".", 1)[-1]
+                        if _is_image_ext("." + ext):
+                            media_types.append("image/" + ext.replace("jpg", "jpeg"))
+                        elif _is_audio_ext("." + ext):
+                            media_types.append("audio/" + ext)
+                        else:
+                            media_types.append("application/octet-stream")
+                        media_urls.append(cached)
+                except Exception:
+                    logger.exception("SimpleX: failed to fetch file %s", file_id)
+
+        # Timestamp
+        ts_str = meta.get("itemTs") or meta.get("createdAt") or ""
+        try:
+            timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
+        except (ValueError, AttributeError):
+            timestamp = datetime.now(tz=timezone.utc)
+
+        # Build source
+        source = self.build_source(
+            chat_id=chat_id,
+            chat_name=chat_name,
+            chat_type="group" if is_group else "dm",
+            user_id=sender_id,
+            user_name=sender_name,
+        )
+
+        # Message type
+        msg_type = MessageType.TEXT
+        if media_types:
+            if any(mt.startswith("audio/") for mt in media_types):
+                msg_type = MessageType.VOICE
+            elif any(mt.startswith("image/") for mt in media_types):
+                msg_type = MessageType.PHOTO
+
+        event_obj = MessageEvent(
+            source=source,
+            text=text,
+            message_type=msg_type,
+            media_urls=media_urls,
+            media_types=media_types,
+            timestamp=timestamp,
+            raw_message=wrapper,
+        )
+
+        await self.handle_message(event_obj)
+
+    async def _fetch_file(self, file_id: Any, file_name: str) -> Optional[str]:
+        """Ask the daemon to receive and return a file attachment."""
+        # simplex-chat exposes `/api/v1/files/{fileId}` on an HTTP port
+        # when started with --http-port. However, the canonical WebSocket API
+        # does not have a direct binary download command; files are stored on
+        # the local filesystem after the daemon accepts them.
+        #
+        # We request acceptance first, then read from the daemon's local path.
+        corr_id = self._make_corr_id()
+        cmd = {
+            "corrId": corr_id,
+            "cmd": f"/freceive {file_id}",
+        }
+        await self._send_ws(cmd)
+        # The daemon will emit a chatItemUpdated event when the file lands;
+        # for simplicity we just wait briefly and rely on the daemon's default path.
+        await asyncio.sleep(2)
+
+        # simplex-chat stores received files in ~/Downloads or a configured path.
+        # We try common locations.
+        for search_dir in (
+            os.path.expanduser("~/Downloads"),
+            os.path.expanduser("~/.simplex/files"),
+            "/tmp/simplex_files",
+        ):
+            candidate = os.path.join(search_dir, file_name)
+            if os.path.exists(candidate):
+                with open(candidate, "rb") as f:
+                    data = f.read()
+                ext = _guess_extension(data)
+                if _is_image_ext(ext):
+                    return cache_image_from_bytes(data, ext)
+                elif _is_audio_ext(ext):
+                    return cache_audio_from_bytes(data, ext)
+                else:
+                    return cache_document_from_bytes(data, file_name)
+        return None
+
+    # ------------------------------------------------------------------
+    # Outbound messages
+    # ------------------------------------------------------------------
+
+    def _make_corr_id(self) -> str:
+        """Generate a unique correlation ID for a request."""
+        corr_id = f"{_CORR_PREFIX}{int(time.time() * 1000)}-{random.randint(0, 9999)}"
+        self._pending_corr_ids.add(corr_id)
+        if len(self._pending_corr_ids) > self._max_pending_corr:
+            # Trim oldest — sets are unordered so just clear the oldest half
+            to_remove = list(self._pending_corr_ids)[:self._max_pending_corr // 2]
+            self._pending_corr_ids -= set(to_remove)
+        return corr_id
+
+    async def _send_ws(self, payload: dict) -> None:
+        """Send a JSON payload over the WebSocket, queuing if not yet connected."""
+        import websockets as _wsexc
+        ws = self._ws
+        if not ws:
+            logger.debug("SimpleX: WS not connected, dropping outbound command")
+            return
+        try:
+            await ws.send(json.dumps(payload))
+        except _wsexc.ConnectionClosed:
+            logger.warning("SimpleX: WS closed while sending")
+        except Exception as e:
+            logger.warning("SimpleX: WS send error: %s", e)
+
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a text message to a contact or group."""
+        corr_id = self._make_corr_id()
+
+        if chat_id.startswith("group:"):
+            group_id = chat_id[6:]
+            cmd_str = f"#[{group_id}] {content}"
+        else:
+            cmd_str = f"@[{chat_id}] {content}"
+
+        payload = {
+            "corrId": corr_id,
+            "cmd": cmd_str,
+        }
+
+        await self._send_ws(payload)
+        return SendResult(success=True)
+
+    async def send_typing(self, chat_id: str, metadata=None) -> None:
+        """SimpleX does not expose a typing indicator API — no-op."""
+        pass
+
+    async def send_image(
+        self,
+        chat_id: str,
+        image_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an image (URL) as a message with optional caption.
+
+        SimpleX has no native ``send_image`` over the WebSocket API — file
+        attachments require the daemon's filesystem-backed flow which is
+        not driven from this adapter. Fall back to a plain text message
+        containing the URL and caption.
+        """
+        text = f"{caption}\n{image_url}".strip() if caption else image_url
+        return await self.send(chat_id, text, reply_to=reply_to, metadata=metadata)
+
+    async def get_chat_info(self, chat_id: str) -> dict:
+        """Return basic chat info."""
+        if chat_id.startswith("group:"):
+            return {"chat_id": chat_id, "type": "group", "name": chat_id[6:]}
+        return {"chat_id": chat_id, "type": "dm", "name": chat_id}
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry-point hooks
+# ---------------------------------------------------------------------------
+
+def check_requirements() -> bool:
+    """Plugin gate: require SIMPLEX_WS_URL AND the websockets package.
+
+    Returning False keeps the platform out of ``get_connected_platforms()``
+    so the gateway never instantiates the adapter when the dependency is
+    missing or no daemon URL is configured.
+    """
+    if not os.getenv("SIMPLEX_WS_URL"):
+        return False
+    try:
+        import websockets  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
+def validate_config(config) -> bool:
+    """Validate that the platform config has enough info to connect."""
+    extra = getattr(config, "extra", {}) or {}
+    ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "")
+    return bool(ws_url)
+
+
+def is_connected(config) -> bool:
+    """Check whether SimpleX is configured (env or config.yaml)."""
+    extra = getattr(config, "extra", {}) or {}
+    ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "")
+    return bool(ws_url)
+
+
+def _env_enablement() -> dict | None:
+    """Seed ``PlatformConfig.extra`` from env vars during gateway config load.
+
+    Called by the platform registry's env-enablement hook BEFORE adapter
+    construction, so ``gateway status`` and ``get_connected_platforms()``
+    reflect env-only configuration without instantiating the WebSocket
+    client. Returns ``None`` when SimpleX isn't minimally configured.
+
+    The special ``home_channel`` key in the returned dict is handled by
+    the core hook — it becomes a proper ``HomeChannel`` dataclass on the
+    ``PlatformConfig`` rather than being merged into ``extra``.
+    """
+    ws_url = os.getenv("SIMPLEX_WS_URL", "").strip()
+    if not ws_url:
+        return None
+    seed: dict = {"ws_url": ws_url}
+    home = os.getenv("SIMPLEX_HOME_CHANNEL", "").strip()
+    if home:
+        seed["home_channel"] = {
+            "chat_id": home,
+            "name": os.getenv("SIMPLEX_HOME_CHANNEL_NAME", "").strip() or home,
+        }
+    return seed
+
+
+async def _standalone_send(
+    pconfig,
+    chat_id: str,
+    message: str,
+    *,
+    thread_id: Optional[str] = None,
+    media_files: Optional[List[str]] = None,
+    force_document: bool = False,
+) -> Dict[str, Any]:
+    """Open an ephemeral WebSocket to the daemon, send, and close.
+
+    Used by ``tools/send_message_tool._send_via_adapter`` when the gateway
+    runner is not in this process (e.g. ``hermes cron`` running as a
+    separate process from ``hermes gateway``). Without this hook,
+    ``deliver=simplex`` cron jobs fail with "No live adapter for platform".
+
+    ``thread_id`` and ``force_document`` are accepted for signature parity
+    with other plugins but are not meaningful here. ``media_files`` is
+    accepted but only the text body is delivered — SimpleX requires the
+    daemon's filesystem-backed file flow which an ephemeral connection
+    cannot drive safely.
+    """
+    try:
+        import websockets as _wsclient
+    except ImportError:
+        return {"error": "websockets not installed. Run: pip install websockets"}
+
+    extra = getattr(pconfig, "extra", {}) or {}
+    ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "ws://127.0.0.1:5225")
+    if not ws_url:
+        return {"error": "SimpleX standalone send: SIMPLEX_WS_URL is required"}
+
+    try:
+        if chat_id.startswith("group:"):
+            group_id = chat_id[6:]
+            cmd_str = f"#[{group_id}] {message}"
+        else:
+            cmd_str = f"@[{chat_id}] {message}"
+
+        payload = {
+            "corrId": f"hermes-snd-{int(time.time() * 1000)}",
+            "cmd": cmd_str,
+        }
+
+        async with _wsclient.connect(ws_url, open_timeout=10, close_timeout=5) as ws:
+            await ws.send(json.dumps(payload))
+            # Give the daemon a moment to process the command before closing.
+            await asyncio.sleep(0.5)
+
+        return {"success": True, "platform": "simplex", "chat_id": chat_id}
+    except Exception as e:
+        return {"error": f"SimpleX send failed: {e}"}
+
+
+def interactive_setup() -> None:
+    """Minimal stdin wizard for ``hermes setup gateway`` → SimpleX.
+
+    Prompts for the WebSocket URL and the optional allowlist / home channel.
+    Writes to ``~/.hermes/.env`` via ``hermes_cli.config``.
+    """
+    print()
+    print("SimpleX Chat setup")
+    print("------------------")
+    print("Requirements:")
+    print("  1. simplex-chat daemon running (e.g. `simplex-chat -p 5225`).")
+    print("  2. Python package `websockets` installed (`pip install websockets`).")
+    print()
+
+    try:
+        from hermes_cli.config import get_env_value, save_env_value
+    except ImportError:
+        print("hermes_cli.config not available; set SIMPLEX_* vars manually in ~/.hermes/.env")
+        return
+
+    def _prompt(var: str, prompt: str, *, secret: bool = False) -> None:
+        existing = get_env_value(var) if callable(get_env_value) else None
+        suffix = " [keep current]" if existing else ""
+        try:
+            if secret:
+                import getpass
+                value = getpass.getpass(f"{prompt}{suffix}: ")
+            else:
+                value = input(f"{prompt}{suffix}: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            print()
+            return
+        if value:
+            save_env_value(var, value)
+
+    _prompt("SIMPLEX_WS_URL", "Daemon WebSocket URL (default ws://127.0.0.1:5225)")
+    _prompt("SIMPLEX_ALLOWED_USERS", "Allowed contact IDs (comma-separated; blank=skip)")
+    _prompt("SIMPLEX_HOME_CHANNEL", "Home channel contact/group ID (or empty)")
+    print("Done. Make sure the simplex-chat daemon is running before starting the gateway.")
+
+
+def register(ctx) -> None:
+    """Plugin entry point — called by the Hermes plugin system at startup."""
+    ctx.register_platform(
+        name="simplex",
+        label="SimpleX Chat",
+        adapter_factory=lambda cfg: SimplexAdapter(cfg),
+        check_fn=check_requirements,
+        validate_config=validate_config,
+        is_connected=is_connected,
+        required_env=["SIMPLEX_WS_URL"],
+        install_hint="pip install websockets   # SimpleX adapter requires the websockets package",
+        setup_fn=interactive_setup,
+        # Env-driven auto-configuration: seeds PlatformConfig.extra so
+        # env-only setups show up in `hermes gateway status` without
+        # instantiating the adapter.
+        env_enablement_fn=_env_enablement,
+        # Cron home-channel delivery support — `deliver=simplex` cron jobs
+        # route to SIMPLEX_HOME_CHANNEL when set.
+        cron_deliver_env_var="SIMPLEX_HOME_CHANNEL",
+        # Out-of-process cron delivery. Without this hook, deliver=simplex
+        # cron jobs fail with "No live adapter" when cron runs separately
+        # from the gateway.
+        standalone_sender_fn=_standalone_send,
+        # Auth env vars for _is_user_authorized() integration
+        allowed_users_env="SIMPLEX_ALLOWED_USERS",
+        allow_all_env="SIMPLEX_ALLOW_ALL_USERS",
+        # SimpleX has no hard line length; we still chunk for sanity.
+        max_message_length=MAX_MESSAGE_LENGTH,
+        # Display
+        emoji="🔒",
+        # SimpleX uses opaque contact IDs only — no phone numbers or
+        # email addresses to redact.
+        pii_safe=True,
+        allow_update_command=True,
+        # LLM guidance
+        platform_hint=(
+            "You are chatting via SimpleX Chat, a private decentralised "
+            "messenger. Contacts are identified by opaque internal IDs, "
+            "not phone numbers or usernames. SimpleX supports standard "
+            "markdown formatting. There is no typing indicator and no "
+            "hard message length limit, but keep responses conversational."
+        ),
+    )
diff --git a/plugins/platforms/simplex/plugin.yaml b/plugins/platforms/simplex/plugin.yaml
new file mode 100644
index 00000000000..2bb87641b63
--- /dev/null
+++ b/plugins/platforms/simplex/plugin.yaml
@@ -0,0 +1,37 @@
+name: simplex-platform
+label: SimpleX Chat
+kind: platform
+version: 1.0.0
+description: >
+  SimpleX Chat gateway adapter for Hermes Agent.
+  Connects to a local simplex-chat daemon via WebSocket and relays
+  messages between SimpleX contacts/groups and the Hermes agent.
+  SimpleX is decentralised and assigns no persistent user IDs —
+  every contact is an opaque internal ID generated at connection
+  time, making it one of the most private messengers available.
+author: Mibayy
+# ``requires_env`` and ``optional_env`` entries are surfaced in the
+# ``hermes config`` UI via the platform-plugin env var injector in
+# ``hermes_cli/config.py``.
+requires_env:
+  - name: SIMPLEX_WS_URL
+    description: "WebSocket URL of the simplex-chat daemon (e.g. ws://127.0.0.1:5225)"
+    prompt: "SimpleX daemon WebSocket URL"
+    password: false
+optional_env:
+  - name: SIMPLEX_ALLOWED_USERS
+    description: "Comma-separated SimpleX contact IDs allowed to talk to the bot"
+    prompt: "Allowed contact IDs (comma-separated)"
+    password: false
+  - name: SIMPLEX_ALLOW_ALL_USERS
+    description: "Allow any contact to talk to the bot (dev only — disables allowlist)"
+    prompt: "Allow all contacts? (true/false)"
+    password: false
+  - name: SIMPLEX_HOME_CHANNEL
+    description: "Default contact/group ID for cron / notification delivery"
+    prompt: "Home channel contact/group ID (or empty)"
+    password: false
+  - name: SIMPLEX_HOME_CHANNEL_NAME
+    description: "Human label for the home channel (defaults to the ID)"
+    prompt: "Home channel display name (or empty)"
+    password: false
diff --git a/tests/gateway/test_simplex_plugin.py b/tests/gateway/test_simplex_plugin.py
new file mode 100644
index 00000000000..0b1b1b21a85
--- /dev/null
+++ b/tests/gateway/test_simplex_plugin.py
@@ -0,0 +1,347 @@
+"""Tests for the SimpleX Chat platform-plugin adapter.
+
+Loaded via the ``_plugin_adapter_loader`` helper so this lives under
+``plugin_adapter_simplex`` in ``sys.modules`` and cannot collide with
+sibling platform-plugin tests on the same xdist worker.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from tests.gateway._plugin_adapter_loader import load_plugin_adapter
+
+_simplex = load_plugin_adapter("simplex")
+
+SimplexAdapter = _simplex.SimplexAdapter
+check_requirements = _simplex.check_requirements
+validate_config = _simplex.validate_config
+is_connected = _simplex.is_connected
+register = _simplex.register
+_env_enablement = _simplex._env_enablement
+_standalone_send = _simplex._standalone_send
+_guess_extension = _simplex._guess_extension
+_is_image_ext = _simplex._is_image_ext
+_is_audio_ext = _simplex._is_audio_ext
+_CORR_PREFIX = _simplex._CORR_PREFIX
+
+
+# ---------------------------------------------------------------------------
+# 1. Platform enum (plugin-discovered, not bundled)
+# ---------------------------------------------------------------------------
+
+def test_platform_enum_resolves_via_plugin_scan():
+    """The plugin filesystem scan should expose Platform("simplex")."""
+    from gateway.config import Platform
+    p = Platform("simplex")
+    assert p.value == "simplex"
+    # Identity stability — repeated lookups return the same pseudo-member
+    assert Platform("simplex") is p
+
+
+# ---------------------------------------------------------------------------
+# 2. check_requirements / validate_config / is_connected
+# ---------------------------------------------------------------------------
+
+def test_check_requirements_needs_url(monkeypatch):
+    monkeypatch.delenv("SIMPLEX_WS_URL", raising=False)
+    assert check_requirements() is False
+
+
+def test_check_requirements_true_when_configured(monkeypatch):
+    monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225")
+    # websockets is a dev dep in this repo via the test plugins; the
+    # check_requirements() gate also asserts the package imports.
+    websockets_present = True
+    try:
+        import websockets  # noqa: F401
+    except ImportError:
+        websockets_present = False
+    assert check_requirements() is websockets_present
+
+
+def test_validate_config_uses_env_or_extra():
+    from gateway.config import PlatformConfig
+    # Empty extra + no env → invalid
+    cfg = PlatformConfig(enabled=True)
+    assert validate_config(cfg) is False
+    # extra-only path → valid
+    cfg2 = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    assert validate_config(cfg2) is True
+
+
+def test_is_connected_mirrors_validate(monkeypatch):
+    from gateway.config import PlatformConfig
+    monkeypatch.delenv("SIMPLEX_WS_URL", raising=False)
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://x"})
+    assert is_connected(cfg) is True
+    assert is_connected(PlatformConfig(enabled=True)) is False
+
+
+# ---------------------------------------------------------------------------
+# 3. _env_enablement seeds PlatformConfig.extra
+# ---------------------------------------------------------------------------
+
+def test_env_enablement_none_when_unset(monkeypatch):
+    monkeypatch.delenv("SIMPLEX_WS_URL", raising=False)
+    assert _env_enablement() is None
+
+
+def test_env_enablement_seeds_ws_url(monkeypatch):
+    monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225")
+    monkeypatch.delenv("SIMPLEX_HOME_CHANNEL", raising=False)
+    seed = _env_enablement()
+    assert seed == {"ws_url": "ws://127.0.0.1:5225"}
+
+
+def test_env_enablement_seeds_home_channel(monkeypatch):
+    monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225")
+    monkeypatch.setenv("SIMPLEX_HOME_CHANNEL", "42")
+    monkeypatch.setenv("SIMPLEX_HOME_CHANNEL_NAME", "Personal")
+    seed = _env_enablement()
+    assert seed["home_channel"] == {"chat_id": "42", "name": "Personal"}
+
+
+def test_env_enablement_home_channel_defaults_name_to_id(monkeypatch):
+    monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225")
+    monkeypatch.setenv("SIMPLEX_HOME_CHANNEL", "42")
+    monkeypatch.delenv("SIMPLEX_HOME_CHANNEL_NAME", raising=False)
+    seed = _env_enablement()
+    assert seed["home_channel"] == {"chat_id": "42", "name": "42"}
+
+
+# ---------------------------------------------------------------------------
+# 4. Adapter init
+# ---------------------------------------------------------------------------
+
+def test_adapter_init_custom_url():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+    assert adapter.ws_url == "ws://localhost:5225"
+    assert adapter._running is False
+    assert adapter._ws is None
+
+
+def test_adapter_init_default_url():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True)
+    adapter = SimplexAdapter(cfg)
+    assert adapter.ws_url == "ws://127.0.0.1:5225"
+
+
+def test_adapter_platform_identity():
+    """Adapter should expose Platform("simplex") identity."""
+    from gateway.config import Platform, PlatformConfig
+    cfg = PlatformConfig(enabled=True)
+    adapter = SimplexAdapter(cfg)
+    assert adapter.platform is Platform("simplex")
+
+
+# ---------------------------------------------------------------------------
+# 5. Helper functions (magic-byte detection)
+# ---------------------------------------------------------------------------
+
+def test_guess_extension_png():
+    assert _guess_extension(b"\x89PNG\r\n\x1a\n") == ".png"
+
+
+def test_guess_extension_jpg():
+    assert _guess_extension(b"\xff\xd8\xff\xe0") == ".jpg"
+
+
+def test_guess_extension_ogg():
+    assert _guess_extension(b"OggS\x00\x02") == ".ogg"
+
+
+def test_guess_extension_unknown():
+    assert _guess_extension(b"\x00\x01\x02\x03") == ".bin"
+
+
+def test_is_image_ext():
+    assert _is_image_ext(".png") is True
+    assert _is_image_ext(".webp") is True
+    assert _is_image_ext(".ogg") is False
+
+
+def test_is_audio_ext():
+    assert _is_audio_ext(".ogg") is True
+    assert _is_audio_ext(".mp3") is True
+    assert _is_audio_ext(".pdf") is False
+
+
+# ---------------------------------------------------------------------------
+# 6. Correlation IDs
+# ---------------------------------------------------------------------------
+
+def test_corr_id_starts_with_prefix_and_tracks_pending():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+    corr_id = adapter._make_corr_id()
+    assert corr_id.startswith(_CORR_PREFIX)
+    assert corr_id in adapter._pending_corr_ids
+
+
+def test_corr_id_pending_set_self_trims():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+    adapter._max_pending_corr = 4
+    for _ in range(10):
+        adapter._make_corr_id()
+    # After many additions, the pending set should be bounded by the trim
+    # logic — at most one trim window above the cap.
+    assert len(adapter._pending_corr_ids) <= adapter._max_pending_corr + 1
+
+
+# ---------------------------------------------------------------------------
+# 7. Outbound send (mocked WS)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_send_dm():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+
+    mock_ws = AsyncMock()
+    adapter._ws = mock_ws
+
+    result = await adapter.send("contact-42", "Hello, SimpleX!")
+    mock_ws.send.assert_called_once()
+    payload = json.loads(mock_ws.send.call_args[0][0])
+    assert payload["cmd"] == "@[contact-42] Hello, SimpleX!"
+    assert payload["corrId"].startswith(_CORR_PREFIX)
+    assert result.success is True
+
+
+@pytest.mark.asyncio
+async def test_send_group():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+
+    mock_ws = AsyncMock()
+    adapter._ws = mock_ws
+
+    result = await adapter.send("group:grp-99", "Hello, group!")
+    payload = json.loads(mock_ws.send.call_args[0][0])
+    assert payload["cmd"] == "#[grp-99] Hello, group!"
+    assert result.success is True
+
+
+@pytest.mark.asyncio
+async def test_send_when_ws_not_connected_does_not_crash():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+    # No _ws assigned — _send_ws should drop quietly
+    result = await adapter.send("contact-42", "hi")
+    assert result.success is True  # send() always returns success — fire-and-forget
+
+
+# ---------------------------------------------------------------------------
+# 8. Inbound: filter own-echo by corrId prefix
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_handle_event_filters_own_corr_id():
+    from gateway.config import PlatformConfig
+    cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"})
+    adapter = SimplexAdapter(cfg)
+    # Pretend we sent a command with this corrId
+    own = adapter._make_corr_id()
+    handler_mock = AsyncMock()
+    adapter._handle_new_chat_item = handler_mock  # type: ignore
+
+    await adapter._handle_event({"corrId": own, "type": "newChatItem"})
+    handler_mock.assert_not_called()
+    assert own not in adapter._pending_corr_ids  # discarded
+
+
+# ---------------------------------------------------------------------------
+# 9. Standalone (out-of-process) send for cron
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_standalone_send_missing_websockets(monkeypatch):
+    """When websockets is unimportable, return a clean error dict.
+
+    Implementation detail: the standalone path does ``import websockets``
+    inside the function body. We simulate the package being absent by
+    pulling it out of ``sys.modules`` and pointing the finder at None.
+    """
+    import sys
+    saved_websockets = sys.modules.pop("websockets", None)
+    saved_meta = list(sys.meta_path)
+
+    class _Blocker:
+        @staticmethod
+        def find_spec(name, path=None, target=None):
+            if name == "websockets" or name.startswith("websockets."):
+                raise ImportError("websockets blocked for test")
+            return None
+
+    sys.meta_path.insert(0, _Blocker())
+    try:
+        pconfig = MagicMock()
+        pconfig.extra = {"ws_url": "ws://localhost:5225"}
+        result = await _standalone_send(pconfig, "contact-42", "hi")
+        assert isinstance(result, dict)
+        assert "error" in result
+        assert "websockets" in result["error"]
+    finally:
+        sys.meta_path[:] = saved_meta
+        if saved_websockets is not None:
+            sys.modules["websockets"] = saved_websockets
+
+
+@pytest.mark.asyncio
+async def test_standalone_send_missing_url(monkeypatch):
+    monkeypatch.delenv("SIMPLEX_WS_URL", raising=False)
+    pconfig = MagicMock()
+    pconfig.extra = {}
+    # We expect the URL fallback (extra+env both empty) to be empty string,
+    # producing an error. We also need websockets to be importable for the
+    # url-check branch to be reached, so skip when it's not.
+    try:
+        import websockets.client  # noqa: F401
+    except ImportError:
+        pytest.skip("websockets not installed")
+
+    result = await _standalone_send(pconfig, "contact-42", "hi")
+    assert isinstance(result, dict)
+    # Either error about URL or a connection attempt failure — both are valid
+    # signals that the standalone path requires configuration.
+    assert "error" in result
+
+
+# ---------------------------------------------------------------------------
+# 10. register() — plugin-side metadata
+# ---------------------------------------------------------------------------
+
+def test_register_calls_register_platform():
+    ctx = MagicMock()
+    register(ctx)
+    ctx.register_platform.assert_called_once()
+    kwargs = ctx.register_platform.call_args.kwargs
+    assert kwargs["name"] == "simplex"
+    assert kwargs["label"] == "SimpleX Chat"
+    assert kwargs["required_env"] == ["SIMPLEX_WS_URL"]
+    assert kwargs["allowed_users_env"] == "SIMPLEX_ALLOWED_USERS"
+    assert kwargs["allow_all_env"] == "SIMPLEX_ALLOW_ALL_USERS"
+    assert kwargs["cron_deliver_env_var"] == "SIMPLEX_HOME_CHANNEL"
+    assert callable(kwargs["check_fn"])
+    assert callable(kwargs["validate_config"])
+    assert callable(kwargs["is_connected"])
+    assert callable(kwargs["env_enablement_fn"])
+    assert callable(kwargs["standalone_sender_fn"])
+    assert callable(kwargs["adapter_factory"])
+    assert callable(kwargs["setup_fn"])
+    # SimpleX uses opaque IDs only — no PII to redact.
+    assert kwargs["pii_safe"] is True
diff --git a/website/docs/user-guide/messaging/simplex.md b/website/docs/user-guide/messaging/simplex.md
new file mode 100644
index 00000000000..60853acd9f8
--- /dev/null
+++ b/website/docs/user-guide/messaging/simplex.md
@@ -0,0 +1,99 @@
+# SimpleX Chat
+
+[SimpleX Chat](https://simplex.chat/) is a private, decentralised messaging platform where users own their contacts and groups. Unlike other platforms, SimpleX assigns no persistent user IDs — every contact is identified by an opaque internal ID generated at connection time, which makes it one of the most private messengers available.
+
+## Prerequisites
+
+- The **simplex-chat** CLI installed and running as a daemon
+- Python package **websockets** (`pip install websockets`)
+
+## Install simplex-chat
+
+Download the latest release from the [simplex-chat GitHub releases](https://github.com/simplex-chat/simplex-chat/releases) page, or via Docker:
+
+```bash
+# Linux / macOS binary
+curl -L https://github.com/simplex-chat/simplex-chat/releases/latest/download/simplex-chat-ubuntu-22_04-x86-64 -o simplex-chat
+chmod +x simplex-chat
+
+# Or Docker
+docker run -p 5225:5225 simplexchat/simplex-chat -p 5225
+```
+
+## Start the daemon
+
+```bash
+simplex-chat -p 5225
+```
+
+The daemon listens on WebSocket at `ws://127.0.0.1:5225` by default.
+
+## Configure Hermes
+
+### Via setup wizard
+
+```bash
+hermes setup gateway
+```
+
+Select **SimpleX Chat** and follow the prompts.
+
+### Via environment variables
+
+Add these to `~/.hermes/.env`:
+
+```
+SIMPLEX_WS_URL=ws://127.0.0.1:5225
+SIMPLEX_ALLOWED_USERS=<contact-id-1>,<contact-id-2>
+SIMPLEX_HOME_CHANNEL=<contact-id>
+```
+
+| Variable | Required | Description |
+|---|---|---|
+| `SIMPLEX_WS_URL` | Yes | WebSocket URL of the simplex-chat daemon |
+| `SIMPLEX_ALLOWED_USERS` | Recommended | Comma-separated contact IDs allowed to use the agent |
+| `SIMPLEX_ALLOW_ALL_USERS` | Optional | Set `true` to allow every contact (use carefully) |
+| `SIMPLEX_HOME_CHANNEL` | Optional | Default contact ID for cron job delivery |
+| `SIMPLEX_HOME_CHANNEL_NAME` | Optional | Human label for the home channel |
+
+## Find your contact ID
+
+After starting the daemon, open a conversation with your agent contact. The contact ID will appear in session logs or via `hermes send_message action=list`.
+
+## Authorization
+
+By default **all contacts are denied**. You must either:
+
+1. Set `SIMPLEX_ALLOWED_USERS` to a comma-separated list of contact IDs, or
+2. Use **DM pairing** — send any message to the bot and it will reply with a pairing code. Enter that code via `hermes gateway pair`.
+
+## Using SimpleX with cron jobs
+
+```python
+cronjob(
+    action="create",
+    schedule="every 1h",
+    deliver="simplex",          # uses SIMPLEX_HOME_CHANNEL
+    prompt="Check for alerts and summarise."
+)
+```
+
+Or target a specific contact:
+
+```python
+send_message(target="simplex:<contact-id>", message="Done!")
+```
+
+## Privacy notes
+
+- SimpleX never reveals phone numbers or email addresses — contacts use opaque IDs
+- The connection between Hermes and the daemon is local WebSocket (`ws://127.0.0.1:5225`) — no data leaves your machine
+- Messages are end-to-end encrypted by the SimpleX protocol before reaching the daemon
+
+## Troubleshooting
+
+**"Cannot reach daemon"** — Ensure `simplex-chat -p 5225` is running and the port matches `SIMPLEX_WS_URL`.
+
+**"websockets not installed"** — Run `pip install websockets`.
+
+**Messages not received** — Check that the contact's ID is in `SIMPLEX_ALLOWED_USERS` or approve them via DM pairing.

From 47614dbfca86afd9e6cf29dbd8aa4effda0932c9 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:33:59 -0700
Subject: [PATCH 023/218] chore: wire simplex docs into sidebar + AUTHOR_MAP

- Adds plugins/platforms/simplex docs page to the messaging sidebar
  between LINE and Open WebUI.
- Maps louismichalot@hotmail.com -> Mibayy in scripts/release.py so the
  attribution check on the salvage PR passes.
---
 scripts/release.py  | 1 +
 website/sidebars.ts | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index b0e1fda9686..7d761d4aa80 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -398,6 +398,7 @@ AUTHOR_MAP = {
     "Mibayy@users.noreply.github.com": "Mibayy",
     "mibayy@users.noreply.github.com": "Mibayy",
     "mibay@clawhub.io": "Mibayy",
+    "louismichalot@hotmail.com": "Mibayy",
     "135070653+sgaofen@users.noreply.github.com": "sgaofen",
     "lzy.dev@gmail.com": "zhiyanliu",
     "me@janstepanovsky.cz": "hhhonzik",
diff --git a/website/sidebars.ts b/website/sidebars.ts
index a2977c87eef..a8d893d6e72 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -145,6 +145,7 @@ const sidebars: SidebarsConfig = {
         'user-guide/messaging/teams-meetings',
         'user-guide/messaging/msgraph-webhook',
         'user-guide/messaging/line',
+        'user-guide/messaging/simplex',
         'user-guide/messaging/open-webui',
         'user-guide/messaging/webhooks',
       ],

From b6e07417c5242f7a3d6af1c8d8f0173248b4253f Mon Sep 17 00:00:00 2001
From: Mibayy <Mibayy@users.noreply.github.com>
Date: Fri, 15 May 2026 01:39:13 -0700
Subject: [PATCH 024/218] feat(cli): show YOLO mode warning in banner and
 status bar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When running with --yolo, all dangerous command approvals are bypassed.
Make this state visible so users don't forget:

- Banner: '⚠ YOLO mode — all approval prompts bypassed' line in red, only
  shown when YOLO is active. Default case is silent (no extra line, no
  always-on 'restricted' label).
- Status bar: '⚠ YOLO' fragment appended in red (#FF4444 bold) across all
  three width tiers (<52, <76, ≥76) in both the plain-text fallback and
  the fragments builder.

Closes #2663

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
---
 cli.py               | 22 ++++++++++++++++++++--
 hermes_cli/banner.py |  3 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/cli.py b/cli.py
index 527269aef7a..27286a3c988 100644
--- a/cli.py
+++ b/cli.py
@@ -3370,8 +3370,11 @@ class HermesCLI:
             percent_label = f"{percent}%" if percent is not None else "--"
             duration_label = snapshot["duration"]
 
+            yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
             if width < 52:
                 text = f"⚕ {snapshot['model_short']} · {duration_label}"
+                if yolo_active:
+                    text += " · ⚠ YOLO"
                 return self._trim_status_bar_text(text, width)
             if width < 76:
                 parts = [f"⚕ {snapshot['model_short']}", percent_label]
@@ -3379,6 +3382,8 @@ class HermesCLI:
                 if compressions:
                     parts.append(f"🗜️ {compressions}")
                 parts.append(duration_label)
+                if yolo_active:
+                    parts.append("⚠ YOLO")
                 return self._trim_status_bar_text(" · ".join(parts), width)
 
             if snapshot["context_length"]:
@@ -3396,6 +3401,8 @@ class HermesCLI:
             prompt_elapsed = snapshot.get("prompt_elapsed")
             if prompt_elapsed:
                 parts.append(prompt_elapsed)
+            if yolo_active:
+                parts.append("⚠ YOLO")
             return self._trim_status_bar_text(" │ ".join(parts), width)
         except Exception:
             return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}"
@@ -3412,6 +3419,7 @@ class HermesCLI:
             # line and produce duplicated status bar rows over long sessions.
             width = self._get_tui_terminal_width()
             duration_label = snapshot["duration"]
+            yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
 
             if width < 52:
                 frags = [
@@ -3419,8 +3427,11 @@ class HermesCLI:
                     ("class:status-bar-strong", snapshot["model_short"]),
                     ("class:status-bar-dim", " · "),
                     ("class:status-bar-dim", duration_label),
-                    ("class:status-bar", " "),
                 ]
+                if yolo_active:
+                    frags.append(("class:status-bar-dim", " · "))
+                    frags.append(("class:status-bar-yolo", "⚠ YOLO"))
+                frags.append(("class:status-bar", " "))
             else:
                 percent = snapshot["context_percent"]
                 percent_label = f"{percent}%" if percent is not None else "--"
@@ -3438,8 +3449,11 @@ class HermesCLI:
                     frags.extend([
                         ("class:status-bar-dim", " · "),
                         ("class:status-bar-dim", duration_label),
-                        ("class:status-bar", " "),
                     ])
+                    if yolo_active:
+                        frags.append(("class:status-bar-dim", " · "))
+                        frags.append(("class:status-bar-yolo", "⚠ YOLO"))
+                    frags.append(("class:status-bar", " "))
                 else:
                     if snapshot["context_length"]:
                         ctx_total = _format_context_length(snapshot["context_length"])
@@ -3472,6 +3486,9 @@ class HermesCLI:
                     if prompt_elapsed:
                         frags.append(("class:status-bar-dim", " │ "))
                         frags.append(("class:status-bar-dim", prompt_elapsed))
+                    if yolo_active:
+                        frags.append(("class:status-bar-dim", " │ "))
+                        frags.append(("class:status-bar-yolo", "⚠ YOLO"))
                     frags.append(("class:status-bar", " "))
 
             total_width = sum(self._status_bar_display_width(text) for _, text in frags)
@@ -13344,6 +13361,7 @@ class HermesCLI:
             'status-bar-warn': 'bg:#1a1a2e #FFD700 bold',
             'status-bar-bad': 'bg:#1a1a2e #FF8C00 bold',
             'status-bar-critical': 'bg:#1a1a2e #FF6B6B bold',
+            'status-bar-yolo': 'bg:#1a1a2e #FF4444 bold',
             # Bronze horizontal rules around the input area
             'input-rule': '#CD7F32',
             # Clipboard image attachment badges
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index c4ec348ef48..036412ac072 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -470,6 +470,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
         model_short = model_short[:25] + "..."
     ctx_str = f" [dim {dim}]·[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else ""
     left_lines.append(f"[{accent}]{model_short}[/]{ctx_str} [dim {dim}]·[/] [dim {dim}]Nous Research[/]")
+
+    if os.getenv("HERMES_YOLO_MODE"):
+        left_lines.append(f"[bold red]⚠ YOLO mode[/] [dim {dim}]— all approval prompts bypassed[/]")
     left_lines.append(f"[dim {dim}]{cwd}[/]")
     if session_id:
         left_lines.append(f"[dim {session_color}]Session: {session_id}[/]")

From 4f8aaf10465566008499e65937f659a29f1ba6ab Mon Sep 17 00:00:00 2001
From: InB4DevOps <tolle.lege+github@gmail.com>
Date: Fri, 15 May 2026 01:40:03 -0700
Subject: [PATCH 025/218] perf(run_agent): accumulate length-continuation
 prefix via list+join
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace O(n²) string concatenation of truncated_response_prefix in the
length-continuation retry loop with a list + ''.join(). Functionally
equivalent: same partial response on early return, same prepend on
final assembly. The legacy retry path is capped at 3 iterations, so
the practical wall-clock win is small, but the new idiom matches the
rest of the codebase and removes a needless repeated allocation.

Salvaged from PR #2717 (the run_conversation portion only — trajectory
refactor dropped because it silently rewrote </tool_response> to </think>).

Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
---
 run_agent.py                                         | 12 ++++++------
 .../test_anthropic_truncation_continuation.py        |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 325e1e13ef3..18ca03bd512 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -12207,7 +12207,7 @@ class AIAgent:
         codex_ack_continuations = 0
         length_continue_retries = 0
         truncated_tool_call_retries = 0
-        truncated_response_prefix = ""
+        truncated_response_parts: List[str] = []
         compression_attempts = 0
         _turn_exit_reason = "unknown"  # Diagnostic: why the loop ended
 
@@ -13100,7 +13100,7 @@ class AIAgent:
                                 interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                                 messages.append(interim_msg)
                                 if assistant_message.content:
-                                    truncated_response_prefix += assistant_message.content
+                                    truncated_response_parts.append(assistant_message.content)
 
                                 if length_continue_retries < 3:
                                     self._vprint(
@@ -13121,7 +13121,7 @@ class AIAgent:
                                     restart_with_length_continuation = True
                                     break
 
-                                partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
+                                partial_response = self._strip_think_blocks("".join(truncated_response_parts)).strip()
                                 self._cleanup_task_resources(effective_task_id)
                                 self._persist_session(messages, conversation_history)
                                 return {
@@ -15325,9 +15325,9 @@ class AIAgent:
 
                     codex_ack_continuations = 0
 
-                    if truncated_response_prefix:
-                        final_response = truncated_response_prefix + final_response
-                        truncated_response_prefix = ""
+                    if truncated_response_parts:
+                        final_response = "".join(truncated_response_parts) + final_response
+                        truncated_response_parts = []
                         length_continue_retries = 0
                     
                     final_response = self._strip_think_blocks(final_response).strip()
diff --git a/tests/run_agent/test_anthropic_truncation_continuation.py b/tests/run_agent/test_anthropic_truncation_continuation.py
index b7a263f1649..872015bc0bc 100644
--- a/tests/run_agent/test_anthropic_truncation_continuation.py
+++ b/tests/run_agent/test_anthropic_truncation_continuation.py
@@ -59,7 +59,7 @@ class TestTruncatedAnthropicResponseNormalization:
         nr = get_transport("anthropic_messages").normalize_response(response)
 
         # The continuation block checks these two attributes:
-        #   assistant_message.content  → appended to truncated_response_prefix
+        #   assistant_message.content  → appended to truncated_response_parts
         #   assistant_message.tool_calls → guards the text-retry branch
         assert nr.content is not None
         assert "partial response" in nr.content

From 647cc0bb0db4328b941008b290dcb986cdd18c54 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:40:07 -0700
Subject: [PATCH 026/218] chore(release): add AUTHOR_MAP entries for InB4DevOps

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 7d761d4aa80..8d2c6c16990 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -89,6 +89,8 @@ AUTHOR_MAP = {
     "zhanganzhe@tenclass.com": "luoyuctl",
     "51604064+luoyuctl@users.noreply.github.com": "luoyuctl",
     "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "tolle.lege+github@gmail.com": "InB4DevOps",
+    "73686890+InB4DevOps@users.noreply.github.com": "InB4DevOps",
     "147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0",
     "97489706+purzbeats@users.noreply.github.com": "purzbeats",
     "hugosequier@gmail.com": "Hugo-SEQUIER",

From 5360b542447daaf0ba8d0f7c3cf0be1751ca0008 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:38:30 -0700
Subject: [PATCH 027/218] fix(providers): set User-Agent on
 ProviderProfile.fetch_models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some catalog endpoints (OpenCode Zen, etc.) sit behind a WAF that
returns 403 for the default Python-urllib/<ver> User-Agent.  The
generic profile-based live fetch in providers/base.py was silently
failing for any such provider — falling through to the static catalog
and missing newly-launched models.

Set a generic 'hermes-cli/<version>' UA on the catalog probe so every
api_key provider profile benefits.  Verified live against opencode-zen:
before this change, profile.fetch_models() raised HTTP 403; after, it
returns 42 models including gpt-5.5, gpt-5.5-pro, kimi-k2.6, glm-5.1
and the *-free variants the static catalog doesn't list.

Also strip the now-stale comment in validate_requested_model() claiming
opencode-zen's /models returns 404 against the HTML marketing site —
the API endpoint at /zen/v1/models returns 200 with valid JSON.

Surfaced by #2651 (@aashizpoudel) — fixes the same user-facing gap
their PR targeted, applied at the right layer so all api_key provider
profiles get live catalogs through the same code path.

Co-authored-by: Aashish Poudel <mr.aashiz@gmail.com>
---
 hermes_cli/models.py | 13 ++++++-------
 providers/base.py    | 18 ++++++++++++++++++
 scripts/release.py   |  2 ++
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 1ffede636a1..bc41132f5d5 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -3702,13 +3702,12 @@ def validate_requested_model(
 
     # Static-catalog fallback: when the /models probe was unreachable,
     # validate against the curated list from provider_model_ids() — same
-    # pattern as the openai-codex and minimax branches above.  This fixes
-    # /model switches in the gateway for providers like opencode-go and
-    # opencode-zen whose /models endpoint returns 404 against the HTML
-    # marketing site.  Without this block, validate_requested_model would
-    # reject every model on such providers, switch_model() would return
-    # success=False, and the gateway would never write to
-    # _session_model_overrides.
+    # pattern as the openai-codex and minimax branches above.  This keeps
+    # /model switches working in the gateway for providers whose /models
+    # endpoint is temporarily unreachable or returns a non-JSON payload.
+    # Without this block, validate_requested_model would reject every model
+    # on such providers, switch_model() would return success=False, and
+    # the gateway would never write to _session_model_overrides.
     provider_label = _PROVIDER_LABELS.get(normalized, normalized)
     try:
         catalog_models = provider_model_ids(normalized)
diff --git a/providers/base.py b/providers/base.py
index a9e76823bb2..fa6765d103c 100644
--- a/providers/base.py
+++ b/providers/base.py
@@ -21,6 +21,20 @@ logger = logging.getLogger(__name__)
 OMIT_TEMPERATURE = object()
 
 
+def _profile_user_agent() -> str:
+    """Return a ``hermes-cli/<version>`` UA string, with a stable fallback.
+
+    Used by ``ProviderProfile.fetch_models`` so the catalog probe is not
+    served the default ``Python-urllib/<ver>`` UA — some providers
+    (OpenCode Zen, etc.) sit behind a WAF that returns 403 for that.
+    """
+    try:
+        from hermes_cli import __version__ as _ver  # lazy: avoid layer cycle at import time
+        return f"hermes-cli/{_ver}"
+    except Exception:
+        return "hermes-cli"
+
+
 @dataclass
 class ProviderProfile:
     """Base provider profile — subclass or instantiate with overrides."""
@@ -153,6 +167,10 @@ class ProviderProfile:
         if api_key:
             req.add_header("Authorization", f"Bearer {api_key}")
         req.add_header("Accept", "application/json")
+        # Some providers (e.g. OpenCode Zen) sit behind a WAF that blocks
+        # the default ``Python-urllib/<ver>`` User-Agent.  Set a generic
+        # hermes-cli UA so the catalog endpoint is reachable.
+        req.add_header("User-Agent", _profile_user_agent())
         for k, v in self.default_headers.items():
             req.add_header(k, v)
 
diff --git a/scripts/release.py b/scripts/release.py
index 8d2c6c16990..21587212b02 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -58,6 +58,8 @@ AUTHOR_MAP = {
     "altriatree@gmail.com": "TruaShamu",
     "m@mobrienv.dev": "mikeyobrien",
     "qiyin.zuo@pcitc.com": "qiyin-code",
+    "mr.aashiz@gmail.com": "aashizpoudel",
+    "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
     "oleksii.lisikh@gmail.com": "olisikh",
     "jeremy@geocaching.com": "outdoorsea",
     "leone.parise@gmail.com": "leoneparise",

From 55f3262e788bdd7dd6adcab1d515d476b6cb9321 Mon Sep 17 00:00:00 2001
From: Animesh Mishra <amethystani@users.noreply.github.com>
Date: Tue, 24 Mar 2026 07:20:51 +0000
Subject: [PATCH 028/218] fix(mcp): pre-compile env-var regex and unify
 interpolation

Remove redundant inner `import re` and regex recompilation on every call in
_interpolate_env_vars. Add module-level _ENV_VAR_PATTERN compiled once.

Replace the separate _interpolate_value() in mcp_config.py (which used \w+
and would silently fail on env vars containing hyphens or dots) with the
shared _ENV_VAR_PATTERN from mcp_tool.py. Remove now-unused import re.
---
 hermes_cli/mcp_config.py | 10 ++--------
 tools/mcp_tool.py        |  7 ++++++-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/hermes_cli/mcp_config.py b/hermes_cli/mcp_config.py
index 8c12ad70758..ed9d7b5f6db 100644
--- a/hermes_cli/mcp_config.py
+++ b/hermes_cli/mcp_config.py
@@ -25,6 +25,7 @@ from hermes_cli.config import (
 )
 from hermes_cli.colors import Colors, color
 from hermes_constants import display_hermes_home
+from tools.mcp_tool import _ENV_VAR_PATTERN
 
 logger = logging.getLogger(__name__)
 
@@ -551,7 +552,7 @@ def cmd_mcp_test(args):
         for k, v in headers.items():
             if isinstance(v, str) and ("key" in k.lower() or "auth" in k.lower()):
                 # Mask the value
-                resolved = _interpolate_value(v)
+                resolved = _ENV_VAR_PATTERN.sub(lambda m: os.getenv(m.group(1), ""), v)
                 if len(resolved) > 8:
                     masked = resolved[:4] + "***" + resolved[-4:]
                 else:
@@ -581,13 +582,6 @@ def cmd_mcp_test(args):
     print()
 
 
-def _interpolate_value(value: str) -> str:
-    """Resolve ``${ENV_VAR}`` references in a string."""
-    def _replace(m):
-        return os.getenv(m.group(1), "")
-    return re.sub(r"\$\{(\w+)\}", _replace, value)
-
-
 # ─── hermes mcp login ────────────────────────────────────────────────────────
 
 def cmd_mcp_login(args):
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index ee1843043dc..c2668395e5d 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -279,6 +279,11 @@ _CREDENTIAL_PATTERN = re.compile(
     re.IGNORECASE,
 )
 
+# Pre-compiled pattern for ${VAR_NAME} style env-var interpolation.
+# Supports any non-} characters in the variable name (hyphens, dots, etc.)
+# so providers like MY-VAR or my.var work correctly.
+_ENV_VAR_PATTERN = re.compile(r"\$\{([^}]+)\}")
+
 
 # ---------------------------------------------------------------------------
 # Security helpers
@@ -2104,7 +2109,7 @@ def _interpolate_env_vars(value):
     if isinstance(value, str):
         def _replace(m):
             return os.environ.get(m.group(1), m.group(0))
-        return re.sub(r"\$\{([^}]+)\}", _replace, value)
+        return _ENV_VAR_PATTERN.sub(_replace, value)
     if isinstance(value, dict):
         return {k: _interpolate_env_vars(v) for k, v in value.items()}
     if isinstance(value, list):

From 59c7cc64f0265195fa15a400411f381dd20b8b4e Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:42:35 -0700
Subject: [PATCH 029/218] chore(release): add AUTHOR_MAP entry for amethystani

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 21587212b02..38392742d43 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -115,6 +115,7 @@ AUTHOR_MAP = {
     "oswaldb22@users.noreply.github.com": "oswaldb22",
     "abdielv@proton.me": "AJV20",
     "mason@growagainorchids.com": "masonjames",
+    "108541149+amethystani@users.noreply.github.com": "amethystani",
     "ytchen0719@gmail.com": "liquidchen",
     "am@studio1.tailb672fe.ts.net": "subtract0",
     "mike@grossmann.at": "ReqX",

From c4a21d783131b04da443be6b624e20bb3b5b87b7 Mon Sep 17 00:00:00 2001
From: nidhi-singh02 <nidhi2894@gmail.com>
Date: Tue, 24 Mar 2026 14:42:16 +0530
Subject: [PATCH 030/218] fix(cli): log swallowed exception in runtime model
 auto-detection

Replaces bare `except Exception: pass` with debug-level logging
so failures in local endpoint model discovery are diagnosable
instead of silently hidden.
---
 hermes_cli/runtime_provider.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 4ac21ea4568..d7c30fe5648 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -102,8 +102,10 @@ def _auto_detect_local_model(base_url: str) -> str:
                 model_id = models[0].get("id", "")
                 if model_id:
                     return model_id
-    except Exception:
-        pass
+    except Exception as exc:
+        # Log instead of silently swallowing — aids debugging when
+        # local model auto-detection fails unexpectedly.
+        logger.debug("Auto-detect model from %s failed: %s", base_url, exc)
     return ""
 
 
From 5301cc212bb72b634fcb4da7bf4380c43d4b3dca Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:46:46 -0700
Subject: [PATCH 031/218] chore(release): add AUTHOR_MAP entry for
 nidhi-singh02

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 38392742d43..7606d058677 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -59,6 +59,7 @@ AUTHOR_MAP = {
     "m@mobrienv.dev": "mikeyobrien",
     "qiyin.zuo@pcitc.com": "qiyin-code",
     "mr.aashiz@gmail.com": "aashizpoudel",
+    "nidhi2894@gmail.com": "nidhi-singh02",
     "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
     "oleksii.lisikh@gmail.com": "olisikh",
     "jeremy@geocaching.com": "outdoorsea",

From eacb398f755b6ee102e75c6d62aed5a9b253e29d Mon Sep 17 00:00:00 2001
From: Nidhi Singh <nidhi2894@gmail.com>
Date: Fri, 15 May 2026 01:49:35 -0700
Subject: [PATCH 032/218] fix(tools): add return_exceptions to asyncio.gather
 in web_tools

Three asyncio.gather() calls in tools/web_tools.py ran without
return_exceptions=True. A single failing task (e.g. LLM rate limit on
one URL) would raise out of gather() and discard every other
successfully fetched/summarized result.

Pass return_exceptions=True and filter BaseException entries with a
warning log before unpacking. Affects:

- chunk summarization gather (large web_extract pages)
- firecrawl per-result LLM post-processing
- tavily crawl per-result LLM post-processing

Closes #2744
---
 tools/web_tools.py | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/tools/web_tools.py b/tools/web_tools.py
index e2743248d22..597edb0c8fd 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -586,11 +586,20 @@ async def _process_large_content_chunked(
     
     # Run all chunk summarizations in parallel
     tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)]
-    results = await asyncio.gather(*tasks)
-    
-    # Collect successful summaries in order
+    # Use return_exceptions=True so a single task failure does not discard
+    # all other successfully summarized chunks.
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+
+    # Filter out exceptions, then collect successful summaries in order
+    successful_results = []
+    for result_item in results:
+        if isinstance(result_item, BaseException):
+            logger.warning("Chunk summarization task failed: %s", result_item)
+            continue
+        successful_results.append(result_item)
+
     summaries = []
-    for chunk_idx, summary in sorted(results, key=lambda x: x[0]):
+    for chunk_idx, summary in sorted(successful_results, key=lambda x: x[0]):
         if summary:
             summaries.append(f"## Section {chunk_idx + 1}\n{summary}")
     
@@ -1038,10 +1047,16 @@ async def web_extract_tool(
             # Run all LLM processing in parallel
             results_list = response.get('results', [])
             tasks = [process_single_result(result) for result in results_list]
-            processed_results = await asyncio.gather(*tasks)
-            
+            # Use return_exceptions=True so a single task failure does not
+            # discard all other successfully processed results.
+            processed_results = await asyncio.gather(*tasks, return_exceptions=True)
+
             # Collect metrics and print results
-            for result, metrics, status in processed_results:
+            for result_item in processed_results:
+                if isinstance(result_item, BaseException):
+                    logger.warning("Web result processing task failed: %s", result_item)
+                    continue
+                result, metrics, status = result_item
                 url = result.get('url', 'Unknown URL')
                 if status == "processed":
                     debug_call_data["compression_metrics"].append(metrics)
@@ -1285,8 +1300,14 @@ async def web_crawl_tool(
                     return result, metrics, "too_short"
 
                 tasks = [_process_tavily_crawl(r) for r in response.get('results', [])]
-                processed_results = await asyncio.gather(*tasks)
-                for result, metrics, status in processed_results:
+                # Use return_exceptions=True so a single task failure does not
+                # discard all other successfully processed crawl results.
+                processed_results = await asyncio.gather(*tasks, return_exceptions=True)
+                for result_item in processed_results:
+                    if isinstance(result_item, BaseException):
+                        logger.warning("Tavily crawl processing task failed: %s", result_item)
+                        continue
+                    result, metrics, status = result_item
                     if status == "processed":
                         debug_call_data["compression_metrics"].append(metrics)
                         debug_call_data["pages_processed_with_llm"] += 1

From 94bdc63ff5f5329e5f2ab0ea213c07e3a7643aff Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 01:49:40 -0700
Subject: [PATCH 033/218] chore(release): add AUTHOR_MAP entry for
 nidhi-singh02

PR #2751 salvage. CI requires AUTHOR_MAP coverage for all
contributor commit emails.
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 7606d058677..4a91762ebeb 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -223,6 +223,7 @@ AUTHOR_MAP = {
     "74749461+yuga-hashimoto@users.noreply.github.com": "yuga-hashimoto",
     "xiangyong@zspace.cn": "CES4751",
     "harish.kukreja@gmail.com": "counterposition",
+    "nidhi2894@gmail.com": "nidhi-singh02",
     "35294173+Fearvox@users.noreply.github.com": "Fearvox",
     "hypnus.yuan@gmail.com": "Hypnus-Yuan",
     "15558128926@qq.com": "xsfX20",

From 837395685099b130a502db3ec25551475fe3c7cc Mon Sep 17 00:00:00 2001
From: nidhi-singh02 <nidhi2894@gmail.com>
Date: Fri, 15 May 2026 01:49:56 -0700
Subject: [PATCH 034/218] fix(slack): guard split()[0] against whitespace-only
 command text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a user sends a Slack message like '/hermes   ' (trailing whitespace
after the slash) the legacy subcommand router hit `text.split()[0]` with
a truthy-but-whitespace-only `text`. `'   '.split()` returns `[]` →
IndexError, blowing up the slash handler before fallthrough to `/help`.

Switch to a two-step guard that materializes the parts list first and
indexes only if non-empty.

Salvaged from PR #2752 by @nidhi-singh02. The PR's other two hunks
(`tools/file_operations.py`, `agent/anthropic_adapter.py`) are
unreachable in current code — `LINTERS` is a hardcoded constant dict
with no empty values, and the anthropic version-detection site is
already guarded by a `result.stdout.strip()` truthy check — so only the
slack hunk is taken.

Closes #2745

Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
---
 gateway/platforms/slack.py | 5 ++++-
 scripts/release.py         | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index ca34ab4acac..2116b569f96 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -2785,7 +2785,10 @@ class SlackAdapter(BasePlatformAdapter):
             from hermes_cli.commands import slack_subcommand_map
             subcommand_map = slack_subcommand_map()
             subcommand_map["compact"] = "/compress"
-            first_word = text.split()[0] if text else ""
+            # Guard against whitespace-only text where ``text`` is truthy but
+            # ``text.split()`` returns ``[]`` (e.g. user sends ``/hermes   ``).
+            parts = text.split() if text else []
+            first_word = parts[0] if parts else ""
             if first_word in subcommand_map:
                 rest = text[len(first_word):].strip()
                 text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
diff --git a/scripts/release.py b/scripts/release.py
index 4a91762ebeb..8a6f30802be 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1071,6 +1071,8 @@ AUTHOR_MAP = {
     "37467487+yifengingit@users.noreply.github.com": "yifengingit",  # PR #25589 salvage (AUTOINCREMENT id ordering)
     "89525629+vanthinh6886@users.noreply.github.com": "vanthinh6886",  # PR #25562 salvage (.env 0600 perms)
     "16034932+Arkmusn@users.noreply.github.com": "Arkmusn",  # PR #25559 salvage (approvals.timeout from config)
+    "nidhi2894@gmail.com": "nidhi-singh02",  # PR #2752 salvage (slack whitespace-only IndexError guard)
+    "38173192+nidhi-singh02@users.noreply.github.com": "nidhi-singh02",
 }
 
 
From 6af99423272ed67dd1f8d88bfdf762d4e5b77a2f Mon Sep 17 00:00:00 2001
From: aydnOktay <xaydinoktay@gmail.com>
Date: Tue, 24 Mar 2026 13:45:33 +0300
Subject: [PATCH 035/218] fix(url-safety): allow only http and https schemes

---
 tests/tools/test_url_safety.py | 8 ++++++++
 tools/url_safety.py            | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/tests/tools/test_url_safety.py b/tests/tools/test_url_safety.py
index 38d27d40af3..5a0cceb2880 100644
--- a/tests/tools/test_url_safety.py
+++ b/tests/tools/test_url_safety.py
@@ -22,6 +22,14 @@ class TestIsSafeUrl:
         ]):
             assert is_safe_url("https://example.com/image.png") is True
 
+    def test_ftp_scheme_blocked(self):
+        """Only http/https should be allowed for fetch tools."""
+        assert is_safe_url("ftp://example.com/file.txt") is False
+
+    def test_missing_scheme_blocked(self):
+        """Bare host/path should be rejected to avoid ambiguous handling."""
+        assert is_safe_url("example.com/path") is False
+
     def test_localhost_blocked(self):
         with patch("socket.getaddrinfo", return_value=[
             (2, 1, 6, "", ("127.0.0.1", 0)),
diff --git a/tools/url_safety.py b/tools/url_safety.py
index 743510b2757..0f3dd597e49 100644
--- a/tools/url_safety.py
+++ b/tools/url_safety.py
@@ -263,6 +263,9 @@ def is_safe_url(url: str) -> bool:
         parsed = urlparse(url)
         hostname = (parsed.hostname or "").strip().lower().rstrip(".")
         scheme = (parsed.scheme or "").strip().lower()
+        if scheme not in {"http", "https"}:
+            logger.warning("Blocked request — unsupported URL scheme: %s", scheme or "<empty>")
+            return False
         if not hostname:
             return False
 

From 13c72fb486e6bfc047bfde93e54116ea7ef7adf4 Mon Sep 17 00:00:00 2001
From: nidhi-singh02 <nidhi2894@gmail.com>
Date: Fri, 15 May 2026 01:51:41 -0700
Subject: [PATCH 036/218] fix(tools): wrap browser provider network calls with
 error handling

Wrap requests.post() in create_session() for browser_use, browserbase,
and firecrawl providers with requests.RequestException handling.
Connection timeouts and DNS resolution failures now surface as clean
RuntimeError messages instead of raw requests exception tracebacks.

Browser Use managed-gateway mode preserves raw exception propagation
so the existing idempotency-key retry semantics keep working.

Closes #2746

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
---
 tools/browser_providers/browser_use.py | 22 ++++++--
 tools/browser_providers/browserbase.py | 77 ++++++++++++++------------
 tools/browser_providers/firecrawl.py   | 17 ++++--
 3 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/tools/browser_providers/browser_use.py b/tools/browser_providers/browser_use.py
index 260249ef0bb..a1f4f425ba0 100644
--- a/tools/browser_providers/browser_use.py
+++ b/tools/browser_providers/browser_use.py
@@ -137,12 +137,22 @@ class BrowserUseProvider(CloudBrowserProvider):
             else {}
         )
 
-        response = requests.post(
-            f"{config['base_url']}/browsers",
-            headers=headers,
-            json=payload,
-            timeout=30,
-        )
+        try:
+            response = requests.post(
+                f"{config['base_url']}/browsers",
+                headers=headers,
+                json=payload,
+                timeout=30,
+            )
+        except requests.RequestException as exc:
+            # Managed mode: propagate raw so callers can retry with the
+            # preserved idempotency key. Direct mode: wrap network failures
+            # into a clean RuntimeError for end users.
+            if managed_mode:
+                raise
+            raise RuntimeError(
+                f"Browser Use API connection failed: {exc}"
+            ) from exc
 
         if not response.ok:
             if managed_mode and not _should_preserve_pending_create_key(response):
diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py
index 5076af4c7a6..4807345214b 100644
--- a/tools/browser_providers/browserbase.py
+++ b/tools/browser_providers/browserbase.py
@@ -92,45 +92,50 @@ class BrowserbaseProvider(CloudBrowserProvider):
             "X-BB-API-Key": config["api_key"],
         }
 
-        response = requests.post(
-            f"{config['base_url']}/v1/sessions",
-            headers=headers,
-            json=session_config,
-            timeout=30,
-        )
+        try:
+            response = requests.post(
+                f"{config['base_url']}/v1/sessions",
+                headers=headers,
+                json=session_config,
+                timeout=30,
+            )
 
-        proxies_fallback = False
-        keepalive_fallback = False
+            proxies_fallback = False
+            keepalive_fallback = False
 
-        # Handle 402 — paid features unavailable
-        if response.status_code == 402:
-            if enable_keep_alive:
-                keepalive_fallback = True
-                logger.warning(
-                    "keepAlive may require paid plan (402), retrying without it. "
-                    "Sessions may timeout during long operations."
-                )
-                session_config.pop("keepAlive", None)
-                response = requests.post(
-                    f"{config['base_url']}/v1/sessions",
-                    headers=headers,
-                    json=session_config,
-                    timeout=30,
-                )
+            # Handle 402 — paid features unavailable
+            if response.status_code == 402:
+                if enable_keep_alive:
+                    keepalive_fallback = True
+                    logger.warning(
+                        "keepAlive may require paid plan (402), retrying without it. "
+                        "Sessions may timeout during long operations."
+                    )
+                    session_config.pop("keepAlive", None)
+                    response = requests.post(
+                        f"{config['base_url']}/v1/sessions",
+                        headers=headers,
+                        json=session_config,
+                        timeout=30,
+                    )
 
-            if response.status_code == 402 and enable_proxies:
-                proxies_fallback = True
-                logger.warning(
-                    "Proxies unavailable (402), retrying without proxies. "
-                    "Bot detection may be less effective."
-                )
-                session_config.pop("proxies", None)
-                response = requests.post(
-                    f"{config['base_url']}/v1/sessions",
-                    headers=headers,
-                    json=session_config,
-                    timeout=30,
-                )
+                if response.status_code == 402 and enable_proxies:
+                    proxies_fallback = True
+                    logger.warning(
+                        "Proxies unavailable (402), retrying without proxies. "
+                        "Bot detection may be less effective."
+                    )
+                    session_config.pop("proxies", None)
+                    response = requests.post(
+                        f"{config['base_url']}/v1/sessions",
+                        headers=headers,
+                        json=session_config,
+                        timeout=30,
+                    )
+        except requests.RequestException as exc:
+            raise RuntimeError(
+                f"Browserbase API connection failed: {exc}"
+            ) from exc
 
         if not response.ok:
             raise RuntimeError(
diff --git a/tools/browser_providers/firecrawl.py b/tools/browser_providers/firecrawl.py
index 17001f72f1d..4a8ae82a2d2 100644
--- a/tools/browser_providers/firecrawl.py
+++ b/tools/browser_providers/firecrawl.py
@@ -47,12 +47,17 @@ class FirecrawlProvider(CloudBrowserProvider):
 
         body: Dict[str, object] = {"ttl": ttl}
 
-        response = requests.post(
-            f"{self._api_url()}/v2/browser",
-            headers=self._headers(),
-            json=body,
-            timeout=30,
-        )
+        try:
+            response = requests.post(
+                f"{self._api_url()}/v2/browser",
+                headers=self._headers(),
+                json=body,
+                timeout=30,
+            )
+        except requests.RequestException as exc:
+            raise RuntimeError(
+                f"Firecrawl API connection failed: {exc}"
+            ) from exc
 
         if not response.ok:
             raise RuntimeError(

From 274217316e65bd7d4030b105548de30747526ec9 Mon Sep 17 00:00:00 2001
From: Steve Kelly <stevekelly622@gmail.com>
Date: Thu, 14 May 2026 13:19:59 -0400
Subject: [PATCH 037/218] fix(codex-runtime): keep migrated root keys top-level

---
 hermes_cli/codex_runtime_plugin_migration.py  | 40 +++++++++++++++----
 .../test_codex_runtime_plugin_migration.py    | 23 ++++++++++-
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py
index dd7faa09794..49b4905d5b2 100644
--- a/hermes_cli/codex_runtime_plugin_migration.py
+++ b/hermes_cli/codex_runtime_plugin_migration.py
@@ -304,6 +304,37 @@ def render_codex_toml_section(
     return "\n".join(out) + "\n"
 
 
+def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> str:
+    """Insert Hermes' managed Codex TOML block while keeping root keys root-scoped.
+
+    TOML has no syntax to return to the document root after a table header.
+    Therefore appending a root key like `default_permissions = ...` after a
+    user table such as `[features]` actually creates `features.default_permissions`,
+    which Codex rejects. Insert the managed block before the first table header
+    so its root keys remain top-level, while preserving user content verbatim.
+    """
+    if not user_text.strip():
+        return managed_block
+
+    lines = user_text.splitlines(keepends=True)
+    first_table_idx: Optional[int] = None
+    for idx, line in enumerate(lines):
+        stripped = line.lstrip()
+        if stripped.startswith("["):
+            first_table_idx = idx
+            break
+
+    if first_table_idx is None:
+        prefix = user_text.rstrip("\n")
+        return f"{prefix}\n\n{managed_block}" if prefix else managed_block
+
+    prefix = "".join(lines[:first_table_idx]).rstrip("\n")
+    suffix = "".join(lines[first_table_idx:]).lstrip("\n")
+    if prefix:
+        return f"{prefix}\n\n{managed_block}\n{suffix}"
+    return f"{managed_block}\n{suffix}"
+
+
 def _strip_existing_managed_block(toml_text: str) -> str:
     """Remove any prior managed section so re-runs idempotently replace it.
 
@@ -571,14 +602,7 @@ def migrate(
             report.errors.append(f"could not read {target}: {exc}")
             return report
         without_managed = _strip_existing_managed_block(existing)
-        # Ensure exactly one blank line between user content and managed block
-        if without_managed and not without_managed.endswith("\n"):
-            without_managed += "\n"
-        new_text = (
-            without_managed.rstrip("\n") + "\n\n" + managed_block
-            if without_managed.strip()
-            else managed_block
-        )
+        new_text = _insert_managed_block_at_top_level(without_managed, managed_block)
     else:
         new_text = managed_block
 
diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
index b2e27f8c97b..c283a668681 100644
--- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py
+++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
@@ -567,10 +567,31 @@ class TestMigrate:
         assert "[model]" in new_text
         assert 'profile = "default"' in new_text
         assert "[providers.openai]" in new_text
-        # And new MCP block appended
+        # And new MCP block inserted without breaking user tables
         assert "[mcp_servers.a]" in new_text
         assert MIGRATION_MARKER in new_text
 
+    def test_managed_root_keys_stay_top_level_when_config_ends_in_table(self, tmp_path):
+        """TOML has no explicit 'leave current table' syntax. If Hermes appends
+        root keys like default_permissions after a user table such as [features],
+        Codex parses them as features.default_permissions and rejects the config.
+        The managed block must therefore be inserted before the first table."""
+        import tomllib
+
+        target = tmp_path / "config.toml"
+        target.write_text(
+            'model = "gpt-5.5"\n'
+            "\n"
+            "[features]\n"
+            "terminal_resize_reflow = true\n"
+        )
+        migrate({}, codex_home=tmp_path, discover_plugins=False, expose_hermes_tools=False)
+        new_text = target.read_text()
+        parsed = tomllib.loads(new_text)
+        assert parsed["default_permissions"] == ":workspace"
+        assert "default_permissions" not in parsed["features"]
+        assert new_text.index(MIGRATION_MARKER) < new_text.index("[features]")
+
     def test_preserves_user_mcp_server_outside_managed_block(self, tmp_path):
         """Quirk #6: when a user adds their own MCP server entry directly
         to ~/.codex/config.toml outside Hermes' managed block, re-running

From 77276070f5a1302908456734f2a5bdfe790260de Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 15 May 2026 14:45:31 +0530
Subject: [PATCH 038/218] fix(codex-runtime): de-dup [plugins.X] tables and
 stop leaking HERMES_HOME into config.toml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds on @steezkelly's Bug A fix (#25857, top-level default_permissions
via _insert_managed_block_at_top_level) by addressing the other two
config-corruption bugs described in #26250:

Bug B (duplicate [plugins.X] tables)
  - Codex itself writes [plugins."<name>@<marketplace>"] tables to
    config.toml when the user runs `codex plugins enable` directly,
    before hermes-agent's managed block exists. On the next migrate run,
    _query_codex_plugins() re-discovers the same plugins via plugin/list
    and render_codex_toml_section() re-emits them inside the managed
    block. Codex's strict TOML parser then rejects the duplicate table
    header on startup.
  - Add _strip_unmanaged_plugin_tables() that drops [plugins.*] tables
    from the user-content portion of the file. Only run it when
    plugin/list succeeded — if the RPC failed we can't re-emit and
    must preserve the user's tables. plugin/list is the source of
    truth when it answers.

Bug C (HERMES_HOME pytest-tempdir leak into ~/.codex/config.toml)
  - _build_hermes_tools_mcp_entry() read HERMES_HOME directly from
    os.environ, so a sibling pytest's monkeypatch.setenv("HERMES_HOME",
    tmp_path) silently burned a transient pytest tempdir into the
    user's real ~/.codex/config.toml. After pytest reaped the tempdir,
    every codex-routed hermes-tools tool call failed silently.
  - Derive HERMES_HOME from get_hermes_home() (the canonical resolver
    that goes through the profile-aware path) and refuse to emit
    obvious test-tempdir paths via _looks_like_test_tempdir() as
    belt-and-suspenders for any other callsite that forgets to patch
    migrate().
  - test_enable_succeeds_when_codex_present in test_codex_runtime_switch.py
    invoked the real migrate() (no mock), writing to Path.home() / .codex
    using whatever HERMES_HOME the running pytest session had set. Add
    the same migrate patch the other apply() tests already use, so the
    suite stops touching the user's real ~/.codex/config.toml.

E2E verification (replicating the issue's repro):
  - Pre-state config.toml with user [mcp_servers.omx_team_run] +
    codex-installed [plugins."tasks@openai-curated"],
    HERMES_HOME="/private/var/folders/.../pytest-of-.../..."
  - On origin/main: tomllib refuses to load the result with
    "Cannot declare ('plugins', 'tasks@openai-curated') twice" AND
    the pytest-tempdir HERMES_HOME is burned in.
  - On this branch: file parses cleanly, default_permissions is
    top-level, exactly one [plugins."tasks@openai-curated"] table
    inside the managed block, no HERMES_HOME in the MCP env.

7 new regression tests covering all three bugs + the test-leak guard.
`bash scripts/run_tests.sh tests/hermes_cli/test_codex_runtime_*.py` —
95 passed, 0 failed.

Closes #26250
---
 hermes_cli/codex_runtime_plugin_migration.py  | 125 ++++++++++-
 .../test_codex_runtime_plugin_migration.py    | 207 ++++++++++++++++++
 tests/hermes_cli/test_codex_runtime_switch.py |   9 +-
 3 files changed, 337 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py
index 49b4905d5b2..4b30d3ebf26 100644
--- a/hermes_cli/codex_runtime_plugin_migration.py
+++ b/hermes_cli/codex_runtime_plugin_migration.py
@@ -335,6 +335,72 @@ def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> st
     return f"{managed_block}\n{suffix}"
 
 
+def _strip_unmanaged_plugin_tables(toml_text: str) -> str:
+    """Remove ``[plugins."<name>@<marketplace>"]`` tables that live OUTSIDE the
+    managed block.
+
+    Codex itself writes these tables when the user runs ``codex plugins enable``
+    directly (i.e. before Hermes' migrate has ever touched the file). When we
+    later run migrate, ``_query_codex_plugins()`` reports the same plugins via
+    the live ``plugin/list`` RPC and we re-emit them inside the managed block.
+    The result without this strip is duplicate ``[plugins."X@Y"]`` table
+    headers — codex's strict TOML parser then refuses to load the file.
+
+    We own the ``[plugins.*]`` namespace once migrate has run, so dropping any
+    pre-existing ``[plugins.*]`` tables is safe: ``plugin/list`` is the source
+    of truth for what's actually installed. The caller is expected to only
+    invoke this strip when ``plugin/list`` succeeded — otherwise we'd lose
+    plugins the user installed via ``codex`` without a way to re-emit them.
+
+    Behavior:
+      * Lines beginning with ``[plugins.`` start a swallow region that ends at
+        the next non-``[plugins.`` table header or end-of-file.
+      * Content inside the managed block is untouched (callers should run
+        ``_strip_existing_managed_block`` first so the managed block has
+        already been removed when this runs).
+    """
+    lines = toml_text.splitlines(keepends=True)
+    out: list[str] = []
+    in_plugin_table = False
+    for line in lines:
+        stripped = line.lstrip()
+        # Only treat a line as a table header when it has the shape
+        # ``[...]`` (optionally followed by a comment). Multi-line array
+        # continuations like ``["nested"],`` also start with ``[`` after
+        # lstrip but are not headers — without this guard they would
+        # falsely flip ``in_plugin_table`` to False mid-table and leak
+        # array fragments into the output.
+        if _looks_like_table_header(stripped):
+            in_plugin_table = stripped.startswith("[plugins.")
+            if in_plugin_table:
+                continue
+        if in_plugin_table:
+            # Swallow keys/comments/blanks until the next table header.
+            continue
+        out.append(line)
+    return "".join(out)
+
+
+def _looks_like_table_header(stripped_line: str) -> bool:
+    """Return True if ``stripped_line`` is a TOML table header.
+
+    A header has the shape ``[name]`` or ``[[name]]`` (array-of-tables),
+    optionally followed by a comment. The closing ``]`` (or ``]]``) must
+    appear on the same line, and no key-assignment ``=`` can precede it.
+    This distinguishes real headers from multi-line array continuation
+    lines that also start with ``[`` after ``lstrip()``.
+    """
+    if not stripped_line.startswith("["):
+        return False
+    # Drop trailing comment so e.g. ``[features]  # note`` still matches.
+    head = stripped_line.split("#", 1)[0].rstrip()
+    if not head.endswith("]"):
+        return False
+    # ``key = [x]`` would have an ``=`` before the bracket; a header doesn't.
+    bracket_idx = head.index("]")
+    return "=" not in head[: bracket_idx + 1]
+
+
 def _strip_existing_managed_block(toml_text: str) -> str:
     """Remove any prior managed section so re-runs idempotently replace it.
 
@@ -462,6 +528,32 @@ def _query_codex_plugins(
     return out, None
 
 
+def _looks_like_test_tempdir(path: str) -> bool:
+    """Heuristic: does ``path`` look like a pytest/transient tempdir?
+
+    pytest tempdirs live under ``pytest-of-<user>/pytest-<n>/`` (created via
+    ``tmp_path`` / ``tmp_path_factory``) and are reaped between sessions.
+    macOS routes ``/tmp`` through ``/private/var/folders/<…>/T`` which is
+    what pytest's tempdir factory uses by default. If a HERMES_HOME pointing
+    at one of those paths is burned into ``~/.codex/config.toml``, every
+    codex-routed hermes-tools call fails silently once the directory is GC'd.
+
+    We err on the side of refusing — losing a (very unlikely) real
+    ``~/.hermes`` symlink that happens to live under ``/private/var/folders``
+    is much less harmful than silently bricking codex's tool surface.
+    """
+    if not path:
+        return False
+    needles = (
+        "pytest-of-",
+        "/pytest-",
+        "/tmp/pytest",
+        "/private/var/folders/",  # macOS tempdir root
+    )
+    normalized = path.lower()
+    return any(needle in normalized for needle in needles)
+
+
 def _build_hermes_tools_mcp_entry() -> dict:
     """Build the codex stdio-transport entry that launches Hermes' own
     tool surface as an MCP server. Codex's subprocess will call back into
@@ -474,9 +566,22 @@ def _build_hermes_tools_mcp_entry() -> dict:
     import sys
 
     env: dict[str, str] = {}
-    # HERMES_HOME passes through if set so the MCP subprocess sees the
-    # same config / auth / sessions DB as the parent CLI.
-    hermes_home = os.environ.get("HERMES_HOME")
+    # HERMES_HOME passes through IF SET so the MCP subprocess sees the same
+    # config / auth / sessions DB as the parent CLI. Read from os.environ
+    # (not get_hermes_home()) on purpose: when the env var is unset we want
+    # codex's subprocess to inherit whatever HERMES_HOME its launcher sets
+    # at runtime (systemd unit, gateway, kanban dispatcher, custom shell),
+    # rather than burning the migrate-time resolved default into config.toml
+    # — that would override the launcher's HERMES_HOME and pin the subprocess
+    # to the wrong profile.
+    #
+    # The pytest-tempdir guard below catches the issue #26250 Bug C scenario:
+    # a sibling test's monkeypatch.setenv("HERMES_HOME", tmp_path) would
+    # otherwise leak a transient pytest tempdir into the user's real
+    # ~/.codex/config.toml and silently brick codex once the tempdir is GC'd.
+    hermes_home = os.environ.get("HERMES_HOME") or ""
+    if hermes_home and _looks_like_test_tempdir(hermes_home):
+        hermes_home = ""
     if hermes_home:
         env["HERMES_HOME"] = hermes_home
     # PYTHONPATH passes through so a worktree-launched hermes finds the
@@ -564,10 +669,16 @@ def migrate(
     # Discover installed Codex curated plugins. Best-effort — never blocks
     # the migration if codex is unreachable or the RPC fails.
     plugins: list[dict] = []
+    plugin_query_succeeded = False
     if discover_plugins and not dry_run:
         plugins, plugin_err = _query_codex_plugins(codex_home=codex_home)
         if plugin_err:
             report.plugin_query_error = plugin_err
+        else:
+            # plugin/list returned authoritatively (even if the list is empty).
+            # That means we own [plugins.*] for this re-render and can safely
+            # strip any pre-existing tables outside the managed block.
+            plugin_query_succeeded = True
         for p in plugins:
             report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}")
 
@@ -602,6 +713,14 @@ def migrate(
             report.errors.append(f"could not read {target}: {exc}")
             return report
         without_managed = _strip_existing_managed_block(existing)
+        # Bug B: when plugin/list ran authoritatively, codex's own
+        # [plugins."<name>@<marketplace>"] tables outside our managed block
+        # would survive _strip_existing_managed_block and then collide with
+        # the entries we re-emit inside the managed block — producing
+        # duplicate-table-header parse errors on codex's next startup. Drop
+        # those pre-existing tables since plugin/list is the source of truth.
+        if plugin_query_succeeded:
+            without_managed = _strip_unmanaged_plugin_tables(without_managed)
         new_text = _insert_managed_block_at_top_level(without_managed, managed_block)
     else:
         new_text = managed_block
diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
index c283a668681..ebdc9f9ae6b 100644
--- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py
+++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
@@ -8,9 +8,13 @@ import pytest
 
 from hermes_cli.codex_runtime_plugin_migration import (
     MIGRATION_MARKER,
+    MIGRATION_END_MARKER,
     MigrationReport,
+    _build_hermes_tools_mcp_entry,
     _format_toml_value,
+    _looks_like_test_tempdir,
     _strip_existing_managed_block,
+    _strip_unmanaged_plugin_tables,
     _translate_one_server,
     migrate,
     render_codex_toml_section,
@@ -656,3 +660,206 @@ class TestMigrate:
         assert "Migrated 2 MCP server(s)" in summary
         assert "- a" in summary
         assert "- b" in summary
+
+
+# ---- Bug B: duplicate [plugins.X] tables ----
+
+
+class TestStripUnmanagedPluginTables:
+    """Regression tests for issue #26250 Bug B.
+
+    When codex itself writes ``[plugins."<name>@<marketplace>"]`` tables
+    (via the user running ``codex plugins enable`` directly), re-running
+    ``hermes codex-runtime migrate`` would re-emit them inside the managed
+    block and the resulting duplicate-table-header would crash codex.
+    """
+
+    def test_strips_plugin_tables_outside_managed_block(self):
+        text = (
+            'model = "gpt-5.5"\n'
+            "\n"
+            "[mcp_servers.user-thing]\n"
+            'command = "x"\n'
+            "\n"
+            '[plugins."tasks@openai-curated"]\n'
+            "enabled = true\n"
+            "\n"
+            '[plugins."web-search@openai-curated"]\n'
+            "enabled = true\n"
+            "\n"
+            "[features]\n"
+            "terminal_resize_reflow = true\n"
+        )
+        stripped = _strip_unmanaged_plugin_tables(text)
+        assert "[plugins." not in stripped
+        # Non-plugin content preserved
+        assert "[mcp_servers.user-thing]" in stripped
+        assert "[features]" in stripped
+        assert "terminal_resize_reflow = true" in stripped
+
+    def test_preserves_content_when_no_plugin_tables(self):
+        text = (
+            'model = "gpt-5.5"\n'
+            "\n"
+            "[mcp_servers.x]\n"
+            'command = "y"\n'
+        )
+        assert _strip_unmanaged_plugin_tables(text) == text
+
+    def test_multi_line_array_in_plugin_table_does_not_leak(self):
+        """A multi-line TOML array inside a [plugins.X] table whose
+        continuation lines start with ``[`` (e.g. nested arrays) must NOT
+        prematurely exit the strip region — otherwise array fragments
+        leak into top-level output and produce invalid TOML on the next
+        codex startup. Regression guard for #26260 review.
+        """
+        text = (
+            '[plugins."tasks@openai-curated"]\n'
+            "allowed = [\n"
+            '  "a",\n'
+            '  ["nested"],\n'
+            "]\n"
+            "[features]\n"
+            "x = 1\n"
+        )
+        stripped = _strip_unmanaged_plugin_tables(text)
+        # Everything inside the plugin table — including the multi-line
+        # array's continuation lines starting with `[` — should be gone.
+        assert '["nested"]' not in stripped
+        assert "allowed" not in stripped
+        # Sibling user table survives intact.
+        assert "[features]" in stripped
+        assert "x = 1" in stripped
+        # Result is still valid TOML.
+        import tomllib
+        tomllib.loads(stripped)
+
+    def test_migrate_dedups_codex_owned_plugin_tables(self, tmp_path, monkeypatch):
+        """End-to-end: codex's pre-existing [plugins.X] tables get replaced by
+        the managed block's re-emission rather than duplicated."""
+        target = tmp_path / "config.toml"
+        target.write_text(
+            "[mcp_servers.user-server]\n"
+            'command = "x"\n'
+            "\n"
+            '[plugins."tasks@openai-curated"]\n'
+            "enabled = true\n"
+        )
+
+        # Simulate codex's plugin/list reporting the same plugin tasks@openai-curated.
+        def fake_query(codex_home=None, timeout=8.0):
+            return (
+                [{"name": "tasks", "marketplace": "openai-curated", "enabled": True}],
+                None,
+            )
+
+        monkeypatch.setattr(
+            "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins",
+            fake_query,
+        )
+        migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False)
+        new_text = target.read_text()
+        # Only ONE [plugins."tasks@openai-curated"] header should remain — inside
+        # the managed block — not the original outside-the-block copy.
+        assert new_text.count('[plugins."tasks@openai-curated"]') == 1
+        # And the surviving one is inside our managed section.
+        managed_start = new_text.index(MIGRATION_MARKER)
+        managed_end = new_text.index(MIGRATION_END_MARKER)
+        plugin_idx = new_text.index('[plugins."tasks@openai-curated"]')
+        assert managed_start < plugin_idx < managed_end
+        # File parses cleanly as TOML (the original duplicate-key error is gone).
+        import tomllib
+        tomllib.loads(new_text)
+
+    def test_migrate_preserves_plugin_tables_when_plugin_list_fails(self, tmp_path, monkeypatch):
+        """If plugin/list RPC fails, we can't re-emit plugins authoritatively,
+        so we must NOT strip the user's existing [plugins.X] tables — that
+        would silently lose them."""
+        target = tmp_path / "config.toml"
+        target.write_text(
+            '[plugins."tasks@openai-curated"]\n'
+            "enabled = true\n"
+        )
+
+        def fake_query(codex_home=None, timeout=8.0):
+            return ([], "plugin/list query failed: codex not installed")
+
+        monkeypatch.setattr(
+            "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins",
+            fake_query,
+        )
+        migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False)
+        new_text = target.read_text()
+        # User's plugin table preserved verbatim — we can't re-emit it.
+        assert '[plugins."tasks@openai-curated"]' in new_text
+
+
+# ---- Bug C: HERMES_HOME tempdir leak into ~/.codex/config.toml ----
+
+
+class TestHermesHomeLeakGuard:
+    """Regression tests for issue #26250 Bug C.
+
+    Previously ``_build_hermes_tools_mcp_entry()`` read ``HERMES_HOME``
+    directly from ``os.environ``, so a pytest ``monkeypatch.setenv`` would
+    leak a transient tempdir path into the user's real ``~/.codex/config.toml``
+    once codex spawned the hermes-tools MCP subprocess.
+    """
+
+    def test_tempdir_detector_recognizes_pytest_paths(self):
+        assert _looks_like_test_tempdir(
+            "/private/var/folders/abc/pytest-of-kshitij/pytest-137/popen-gw2/test_X/hermes_test"
+        )
+        assert _looks_like_test_tempdir(
+            "/tmp/pytest-of-user/pytest-12/test_X/hermes"
+        )
+        assert _looks_like_test_tempdir(
+            "/private/var/folders/zz/T/pytest-of-bob/pytest-1"
+        )
+
+    def test_tempdir_detector_accepts_real_hermes_home(self):
+        assert not _looks_like_test_tempdir("/Users/alice/.hermes")
+        assert not _looks_like_test_tempdir("/home/bob/.hermes")
+        assert not _looks_like_test_tempdir("/opt/hermes")
+        assert not _looks_like_test_tempdir("")
+
+    def test_pytest_tempdir_not_burned_into_mcp_env(self, monkeypatch):
+        """The headline regression: even when HERMES_HOME points at a pytest
+        tempdir, _build_hermes_tools_mcp_entry() must NOT propagate it."""
+        monkeypatch.setenv(
+            "HERMES_HOME",
+            "/private/var/folders/xx/pytest-of-user/pytest-99/test_x/hermes_test",
+        )
+        entry = _build_hermes_tools_mcp_entry()
+        env = entry.get("env", {})
+        assert "HERMES_HOME" not in env, (
+            f"pytest-tempdir HERMES_HOME leaked into codex MCP entry: "
+            f"{env.get('HERMES_HOME')!r}"
+        )
+
+    def test_real_hermes_home_propagates(self, monkeypatch, tmp_path):
+        """A legitimate HERMES_HOME (not a tempdir path) DOES propagate so the
+        MCP subprocess sees the same config as the parent CLI."""
+        # Use a path that looks real — under /Users or /home, not /var/folders.
+        # We can't easily create one in the test, so just use a stable path
+        # outside any tempdir-detector needle. The detector checks for tempdir
+        # markers, not for path existence.
+        real_path = "/Users/alice/.hermes"
+        monkeypatch.setenv("HERMES_HOME", real_path)
+        entry = _build_hermes_tools_mcp_entry()
+        env = entry.get("env", {})
+        assert env.get("HERMES_HOME") == real_path
+
+    def test_unset_hermes_home_omits_env_key(self, monkeypatch):
+        """When HERMES_HOME is unset in the environment, the MCP entry MUST
+        NOT bake in a resolved-default path. The codex subprocess should
+        inherit whatever HERMES_HOME its launcher (systemd, gateway, shell)
+        sets at runtime, rather than being pinned to migrate-time defaults.
+        Regression guard for issue #26250 follow-up review."""
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+        entry = _build_hermes_tools_mcp_entry()
+        env = entry.get("env", {})
+        assert "HERMES_HOME" not in env, (
+            f"HERMES_HOME should not be set when env var is unset, got: "
+            f"{env.get('HERMES_HOME')!r}"
+        )
diff --git a/tests/hermes_cli/test_codex_runtime_switch.py b/tests/hermes_cli/test_codex_runtime_switch.py
index 9a01543776e..7bf1a59e1e7 100644
--- a/tests/hermes_cli/test_codex_runtime_switch.py
+++ b/tests/hermes_cli/test_codex_runtime_switch.py
@@ -114,8 +114,15 @@ class TestApply:
         def persist(c):
             persisted.update(c)
 
+        # Patch migrate so this test doesn't reach into the user's real
+        # ~/.codex/config.toml. See issue #26250 Bug C — without this patch,
+        # crs.apply() invokes the real migrate() which writes to
+        # Path.home() / ".codex" using whatever HERMES_HOME the running pytest
+        # session has set, leaking pytest tempdir paths into the user's
+        # codex config.
         with patch.object(crs, "check_codex_binary_ok",
-                          return_value=(True, "0.130.0")):
+                          return_value=(True, "0.130.0")), \
+             patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
             r = crs.apply(cfg, "codex_app_server", persist_callback=persist)
         assert r.success
         assert r.new_value == "codex_app_server"

From f199cd9f84d8e59f0e50ce8d99aa9ac8adcc571a Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 15 May 2026 05:03:43 -0700
Subject: [PATCH 039/218] chore(release): map brian@dralth.com to btorresgil
 for #22345 salvage (#26319)

PR #22345 by @btorresgil authors commits as 'Brian Conklin
<brian@dralth.com>' (git config carries a different name/email than the
GitHub account). GitHub's commit-author mapping correctly attributes these
commits to @btorresgil based on the public-key registration, but Hermes'
release attribution audit reads the raw commit email, not the GitHub
mapping. Without this AUTHOR_MAP entry, salvaging #22345 would fail
`scripts/contributor_audit.py` strict mode at release time.

Prerequisite for the langfuse trace fix salvage that cherry-picks
@btorresgil's commits onto current main.
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 8a6f30802be..f3df43c3fe1 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -265,6 +265,7 @@ AUTHOR_MAP = {
     "yuxiangl490@gmail.com": "y0shua1ee",
     "manmit0x@gmail.com": "0xDevNinja",
     "stevekelly622@gmail.com": "steezkelly",
+    "brian@dralth.com": "btorresgil",
     "momowind@gmail.com": "momowind",
     "clockwork-codex@users.noreply.github.com": "misery-hl",
     "207811921+misery-hl@users.noreply.github.com": "misery-hl",

From db84a78e618bf973ffc403ed2e1f8162f2591daa Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 15 May 2026 05:04:02 -0700
Subject: [PATCH 040/218] =?UTF-8?q?fix(langfuse):=20complete=20observabili?=
 =?UTF-8?q?ty=20fix=20=E2=80=94=20trace=20I/O,=20tool=20outputs,=20placeho?=
 =?UTF-8?q?lder=20credentials=20(closes=20#22342,=20#22763)=20(#26320)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(langfuse): reject placeholder credentials with one-shot warning

When operators leave HERMES_LANGFUSE_PUBLIC_KEY / HERMES_LANGFUSE_SECRET_KEY
at a template value like 'placeholder', 'test-key', or 'your-langfuse-key',
the Langfuse SDK silently accepts the credentials at construction time and
drops every trace at flush time. No warning, no error — just an empty
Langfuse dashboard the operator only notices hours later.

Add prefix-based validation in _get_langfuse() against the documented
'pk-lf-' / 'sk-lf-' prefixes that Langfuse always issues server-side.
Anything else fires a single warning naming the offending env var(s)
with a log-safe value preview (full string for short placeholders so the
operator knows which template they left in place; truncated for long
values so a real secret pasted into the wrong field never hits the log),
then short-circuits via the existing _INIT_FAILED cache so the warning
fires once per process, not once per hook invocation.

The check sits after the 'Langfuse is None' SDK-installed guard so hosts
without the optional langfuse SDK don't see misleading 'set real keys'
hints when the actionable fix is 'pip install langfuse'. Missing
credentials remains the documented opt-out path and stays silent — no
log noise for unconfigured installs.

Fixes #22763
Fixes #23823

* fix(langfuse): use actual API request messages for generation input

on_pre_llm_request previously used the messages kwarg alone, which
could be None when Hermes passes the payload via request_messages,
conversation_history, or user_message instead. Add _coerce_request_messages
to pick the first available list across all variants, falling back to a
synthetic user message. Generations now show the real outbound payload
rather than an empty input.

* fix(langfuse): record tool call outputs in traces

Tool observations showed input (arguments) but output was always
undefined. Root cause: when tool_call_id is empty, pre_tool_call stored
observations under a unique time-based key that post_tool_call could
never reconstruct, so every tool span was closed without output by the
_finish_trace sweep.

Fix pre/post matching by routing empty-tool_call_id tools through a
per-name FIFO queue (pending_tools_by_name) instead of the time-based
key. Tools with a tool_call_id continue to use the id-keyed dict.

Also:
 - Preserve OpenAI-style nested function shape in serialized tool calls
   so Langfuse renders name/arguments correctly
 - Keep name + tool_call_id on role:tool messages for proper pairing
 - Backfill tool results onto the matching turn_tool_calls entry so the
   generation's tool-call record carries the result alongside arguments
 - Coerce request messages from whichever field the runtime provides
   (request_messages, messages, conversation_history, user_message)

* fix(langfuse): salvage-review polish — drop dead is_first_turn, shallow-copy request_messages, real threaded FIFO test

Self-review of the combined #22345 + #23831 salvage surfaced three issues
worth fixing in the same PR rather than as follow-ups:

1. Drop is_first_turn from the pre_api_request hook. The boolean expression
   `not bool(conversation_history)` was wrong: conversation_history is
   reassigned to None mid-run after compression (5 sites in run_agent.py),
   so the value flips False -> True mid-conversation on every post-compression
   API call. The langfuse plugin never consumed it, so the kwarg was both
   misleading AND dead.

2. Replace copy.deepcopy(request_messages) with shallow list() copy. The
   pre_api_request hook contract discards return values (invoke_hook never
   writes back to api_kwargs), and the langfuse plugin's _serialize_messages
   already builds its own snapshot dicts via _safe_value. A deepcopy on every
   API call would walk every tool result and base64 image — significant
   overhead for no real isolation benefit. Shallow copy of the outer list
   protects against later mutations of api_messages without paying for the
   inner-dict walk.

3. Rename test_empty_tool_call_id_concurrent_fifo_order ->
   test_empty_tool_call_id_observations_are_fifo_within_tool_name and add a
   real test_threaded_post_calls_preserve_fifo_under_lock that spawns 8
   threads behind a barrier to actually exercise _STATE_LOCK on the
   pending_tools_by_name queue. The original test was sequential and only
   validated Python list semantics; this one validates the lock discipline.

4. Fix stale 'Cleared by reset_cache_for_tests()' comment on _INIT_FAILED —
   that function does not exist. Tests reload the module via sys.modules.pop
   + importlib.import_module instead.

Tests: 37 langfuse plugin tests pass, 658 plugin tests overall pass.

---------

Co-authored-by: xxxigm <tuancanhnguyen706@gmail.com>
Co-authored-by: Brian Conklin <brian@dralth.com>
---
 plugins/observability/langfuse/__init__.py | 168 ++++++-
 run_agent.py                               |  16 +
 tests/plugins/test_langfuse_plugin.py      | 538 ++++++++++++++++++++-
 tests/run_agent/test_run_agent.py          |   5 +-
 4 files changed, 705 insertions(+), 22 deletions(-)

diff --git a/plugins/observability/langfuse/__init__.py b/plugins/observability/langfuse/__init__.py
index 9c9583261a6..8516030fb01 100644
--- a/plugins/observability/langfuse/__init__.py
+++ b/plugins/observability/langfuse/__init__.py
@@ -47,6 +47,7 @@ class TraceState:
     root_span: Any
     generations: Dict[str, Any] = field(default_factory=dict)
     tools: Dict[str, Any] = field(default_factory=dict)
+    pending_tools_by_name: Dict[str, list] = field(default_factory=dict)
     turn_tool_calls: list[dict[str, Any]] = field(default_factory=list)
     last_updated_at: float = field(default_factory=time.time)
 
@@ -58,6 +59,17 @@ _READ_FILE_LINE_RE = re.compile(r"^\s*(\d+)\|(.*)$")
 _READ_FILE_HEAD_LINES = 25
 _READ_FILE_TAIL_LINES = 15
 
+# Langfuse-issued keys always carry these prefixes (cloud or self-hosted —
+# the prefix is baked into the server-side issuance flow, not a UI hint).
+# Anything else (`placeholder`, `test-key`, `your-langfuse-key`, etc.) is a
+# leftover template value and would cause the SDK to silently accept the
+# credentials at construction time but drop every trace at flush time.
+# See #23823 — the silent-failure bug this guard fixes.
+_LANGFUSE_KEY_PREFIXES: Dict[str, str] = {
+    "HERMES_LANGFUSE_PUBLIC_KEY": "pk-lf-",
+    "HERMES_LANGFUSE_SECRET_KEY": "sk-lf-",
+}
+
 
 def _env(name: str, default: str = "") -> str:
     return os.environ.get(name, default).strip()
@@ -82,10 +94,49 @@ def _debug(message: str) -> None:
 
 # Sentinel: "_get_langfuse() has tried and failed". Lets us short-circuit
 # every subsequent hook call without re-checking env vars or re-attempting
-# SDK init. Cleared by reset_cache_for_tests().
+# SDK init. Tests clear this by reloading the module via
+# ``sys.modules.pop(...) + importlib.import_module(...)`` rather than via a
+# dedicated reset function. Runtime callers cannot reset the cache; if an
+# operator fixes a misconfigured credential they must restart the process.
 _INIT_FAILED = object()
 
 
+def _redact_key_preview(value: str) -> str:
+    """Return a brief, log-safe preview of a credential value.
+
+    Keeps enough characters to disambiguate common placeholders
+    (``placeholder``, ``test-key``, ``your-key``) without echoing a
+    real secret in full if an operator pasted one into the wrong env
+    var.  Used only for the once-per-process placeholder-detection
+    warning in :func:`_get_langfuse`.
+    """
+    if not value:
+        return "<empty>"
+    if len(value) <= 12:
+        return repr(value)
+    return repr(value[:6] + "...")
+
+
+def _validate_langfuse_key(env_name: str, value: str) -> Optional[str]:
+    """Return an error message if ``value`` is not a real Langfuse key.
+
+    Returns ``None`` when the value matches the documented Langfuse
+    prefix for ``env_name``, or when no prefix is registered for the
+    name (in which case we trust the operator).  When validation
+    fails the returned string is suitable for direct inclusion in a
+    single log line — it names the env var and shows a safe preview.
+    """
+    expected = _LANGFUSE_KEY_PREFIXES.get(env_name, "")
+    if not expected:
+        return None
+    if value.startswith(expected):
+        return None
+    return (
+        f"{env_name}={_redact_key_preview(value)} "
+        f"(expected {expected!r} prefix)"
+    )
+
+
 def _get_langfuse() -> Optional[Langfuse]:
     """Return a cached Langfuse client, or ``None`` if unavailable.
 
@@ -111,6 +162,33 @@ def _get_langfuse() -> Optional[Langfuse]:
         _LANGFUSE_CLIENT = _INIT_FAILED
         return None
 
+    # Reject placeholder credentials with a one-shot warning so the
+    # operator sees the misconfiguration instead of silently shipping a
+    # broken observability stack (#23823).  The SDK does not validate
+    # keys at construction time — it queues traces in memory and only
+    # discovers the auth failure when the background flush thread tries
+    # to post them, by which point the warning is buried under whatever
+    # else the process is logging.  Catch it here, surface it once, and
+    # short-circuit via the same _INIT_FAILED path as the empty case.
+    placeholder_issues = [
+        msg
+        for msg in (
+            _validate_langfuse_key("HERMES_LANGFUSE_PUBLIC_KEY", public_key),
+            _validate_langfuse_key("HERMES_LANGFUSE_SECRET_KEY", secret_key),
+        )
+        if msg
+    ]
+    if placeholder_issues:
+        logger.warning(
+            "Langfuse plugin: credentials look like placeholders, traces will "
+            "NOT be emitted (%s). Set real Langfuse keys (pk-lf-... / sk-lf-...) "
+            "or unset HERMES_LANGFUSE_PUBLIC_KEY / HERMES_LANGFUSE_SECRET_KEY to "
+            "silence this warning.",
+            "; ".join(placeholder_issues),
+        )
+        _LANGFUSE_CLIENT = _INIT_FAILED
+        return None
+
     base_url = _env("HERMES_LANGFUSE_BASE_URL") or _env("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com"
     environment = _env("HERMES_LANGFUSE_ENV") or _env("LANGFUSE_ENV")
     release = _env("HERMES_LANGFUSE_RELEASE") or _env("LANGFUSE_RELEASE")
@@ -328,6 +406,21 @@ def _extract_last_user_message(messages: Any) -> Any:
     return None
 
 
+def _coerce_request_messages(
+    *,
+    request_messages: Any = None,
+    messages: Any = None,
+    conversation_history: Any = None,
+    user_message: Any = None,
+) -> list[dict[str, Any]]:
+    for candidate in (request_messages, messages, conversation_history):
+        if isinstance(candidate, list):
+            return candidate
+    if user_message is None:
+        return []
+    return [{"role": "user", "content": user_message}]
+
+
 def _serialize_messages(messages: Any) -> list[dict[str, Any]]:
     if not isinstance(messages, list):
         return []
@@ -343,8 +436,11 @@ def _serialize_messages(messages: Any) -> list[dict[str, Any]]:
                 parse_json_strings=(role == "tool"),
             ),
         }
-        if role == "tool" and message.get("tool_call_id"):
-            item["tool_call_id"] = message.get("tool_call_id")
+        if role == "tool":
+            if message.get("tool_call_id"):
+                item["tool_call_id"] = message.get("tool_call_id")
+            if message.get("name"):
+                item["name"] = _safe_value(message.get("name"))
         if message.get("tool_calls"):
             item["tool_calls"] = _safe_value(message.get("tool_calls"), parse_json_strings=True)
         serialized.append(item)
@@ -359,15 +455,16 @@ def _serialize_tool_calls(tool_calls: Any) -> list[dict[str, Any]]:
         fn = getattr(tool_call, "function", None)
         name = getattr(fn, "name", None) if fn else None
         arguments = getattr(fn, "arguments", None) if fn else None
-        if isinstance(arguments, str):
-            try:
-                arguments = json.loads(arguments)
-            except Exception:
-                pass
+        safe_arguments = _safe_value(arguments, parse_json_strings=False)
         serialized.append({
             "id": getattr(tool_call, "id", None),
+            "type": getattr(tool_call, "type", None) or "function",
             "name": name,
-            "arguments": _safe_value(arguments, parse_json_strings=True),
+            "arguments": safe_arguments,
+            "function": {
+                "name": name,
+                "arguments": safe_arguments,
+            },
         })
     return serialized
 
@@ -564,6 +661,9 @@ def _finish_trace(task_key: str, *, output: Any = None) -> None:
             _end_observation(observation)
         for observation in state.tools.values():
             _end_observation(observation)
+        for queue in state.pending_tools_by_name.values():
+            for observation in queue:
+                _end_observation(observation)
         final_output = _merge_trace_output(output, state)
         if final_output is not None:
             state.root_span.set_trace_io(output=final_output)
@@ -636,6 +736,7 @@ def on_pre_llm_request(
     base_url: str = "",
     api_mode: str = "",
     api_call_count: int = 0,
+    request_messages: Any = None,
     messages: Any = None,
     turn_type: str = "user",
     message_count: int = 0,
@@ -643,12 +744,21 @@ def on_pre_llm_request(
     approx_input_tokens: int = 0,
     request_char_count: int = 0,
     max_tokens: Any = None,
+    conversation_history: Any = None,
+    user_message: Any = None,
     **_: Any,
 ) -> None:
     client = _get_langfuse()
     if client is None:
         return
 
+    input_messages = _coerce_request_messages(
+        request_messages=request_messages,
+        messages=messages,
+        conversation_history=conversation_history,
+        user_message=user_message,
+    )
+
     task_key = _trace_key(task_id, session_id)
     req_key = _request_key(api_call_count)
 
@@ -663,7 +773,7 @@ def on_pre_llm_request(
                 provider=provider,
                 model=model,
                 api_mode=api_mode,
-                messages=messages,
+                messages=input_messages,
                 client=client,
             )
             _TRACE_STATE[task_key] = state
@@ -676,7 +786,7 @@ def on_pre_llm_request(
             client=client,
             name=f"LLM call {api_call_count}",
             as_type="generation",
-            input_value=_serialize_messages(messages),
+            input_value=_serialize_messages(input_messages),
             metadata={
                 "provider": provider,
                 "platform": platform,
@@ -815,13 +925,12 @@ def on_pre_tool_call(*, tool_name: str = "", args: Any = None, task_id: str = ""
         return
 
     task_key = _trace_key(task_id, session_id)
-    tool_key = tool_call_id or f"{tool_name}:{time.time_ns()}"
 
     with _STATE_LOCK:
         state = _TRACE_STATE.get(task_key)
         if state is None:
             return
-        state.tools[tool_key] = _start_child_observation(
+        observation = _start_child_observation(
             state,
             client=client,
             name=f"Tool: {tool_name}",
@@ -829,22 +938,29 @@ def on_pre_tool_call(*, tool_name: str = "", args: Any = None, task_id: str = ""
             input_value=_safe_value(args),
             metadata={"tool_name": tool_name, "tool_call_id": tool_call_id},
         )
+        if tool_call_id:
+            state.tools[tool_call_id] = observation
+        else:
+            state.pending_tools_by_name.setdefault(tool_name, []).append(observation)
 
 
 def on_post_tool_call(*, tool_name: str = "", args: Any = None, result: Any = None,
                       task_id: str = "", session_id: str = "", tool_call_id: str = "", **_: Any) -> None:
     task_key = _trace_key(task_id, session_id)
-    tool_key = tool_call_id or ""
     observation = None
 
     with _STATE_LOCK:
         state = _TRACE_STATE.get(task_key)
         if state is None:
             return
-        if tool_key:
-            observation = state.tools.pop(tool_key, None)
-        elif state.tools:
-            _, observation = state.tools.popitem()
+        if tool_call_id:
+            observation = state.tools.pop(tool_call_id, None)
+        if observation is None:
+            queue = state.pending_tools_by_name.get(tool_name)
+            if queue:
+                observation = queue.pop(0)
+                if not queue:
+                    state.pending_tools_by_name.pop(tool_name, None)
 
     if observation is None:
         return
@@ -854,10 +970,24 @@ def on_post_tool_call(*, tool_name: str = "", args: Any = None, result: Any = No
     else:
         result_value = result
     result_value = _normalize_payload(result_value, tool_name=tool_name, args=args)
+    safe_result_value = _safe_value(result_value, parse_json_strings=True)
+
+    # Backfill so the generation's tool_call record carries the result alongside arguments.
+    if tool_call_id:
+        with _STATE_LOCK:
+            state = _TRACE_STATE.get(task_key)
+            if state is not None:
+                for tool_call in reversed(state.turn_tool_calls):
+                    if tool_call.get("id") == tool_call_id:
+                        tool_call["output"] = safe_result_value
+                        function_payload = tool_call.get("function")
+                        if isinstance(function_payload, dict):
+                            function_payload["output"] = safe_result_value
+                        break
 
     _end_observation(
         observation,
-        output=_safe_value(result_value, parse_json_strings=True),
+        output=safe_result_value,
         metadata={"tool_name": tool_name, "args": _safe_value(args, parse_json_strings=True)},
     )
 
diff --git a/run_agent.py b/run_agent.py
index 18ca03bd512..a4df8749777 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -12668,16 +12668,30 @@ class AIAgent:
 
                     try:
                         from hermes_cli.plugins import invoke_hook as _invoke_hook
+                        request_messages = api_kwargs.get("messages")
+                        if not isinstance(request_messages, list):
+                            request_messages = api_kwargs.get("input")
+                        if not isinstance(request_messages, list):
+                            request_messages = api_messages
+                        # Shallow-copy the outer list so plugins that retain the
+                        # reference for async snapshotting don't observe later
+                        # mutations of api_messages.  The inner dicts are not
+                        # mutated by the agent loop, so a shallow copy is
+                        # sufficient; a deepcopy would walk every tool result
+                        # and base64 image on every API call.
                         _invoke_hook(
                             "pre_api_request",
                             task_id=effective_task_id,
                             session_id=self.session_id or "",
+                            user_message=original_user_message,
+                            conversation_history=list(messages),
                             platform=self.platform or "",
                             model=self.model,
                             provider=self.provider,
                             base_url=self.base_url,
                             api_mode=self.api_mode,
                             api_call_count=api_call_count,
+                            request_messages=list(request_messages) if isinstance(request_messages, list) else [],
                             message_count=len(api_messages),
                             tool_count=len(self.tools or []),
                             approx_input_tokens=approx_tokens,
@@ -14582,7 +14596,9 @@ class AIAgent:
                         finish_reason=finish_reason,
                         message_count=len(api_messages),
                         response_model=getattr(response, "model", None),
+                        response=response,
                         usage=self._usage_summary_for_api_request_hook(response),
+                        assistant_message=assistant_message,
                         assistant_content_chars=len(_assistant_text),
                         assistant_tool_call_count=len(_assistant_tool_calls),
                     )
diff --git a/tests/plugins/test_langfuse_plugin.py b/tests/plugins/test_langfuse_plugin.py
index 6d9fcce38ee..313d2e94a72 100644
--- a/tests/plugins/test_langfuse_plugin.py
+++ b/tests/plugins/test_langfuse_plugin.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import importlib
+import logging
 import sys
 from pathlib import Path
 
@@ -164,7 +165,542 @@ class TestHooksInert:
 
         # Each hook should just return; no exceptions.
         mod.on_pre_llm_call(task_id="t", session_id="s", messages=[{"role": "user", "content": "hi"}])
-        mod.on_pre_llm_request(task_id="t", session_id="s", api_call_count=1, messages=[])
+        mod.on_pre_llm_request(task_id="t", session_id="s", api_call_count=1, request_messages=[])
         mod.on_post_llm_call(task_id="t", session_id="s", api_call_count=1)
         mod.on_pre_tool_call(tool_name="read_file", args={}, task_id="t", session_id="s")
         mod.on_post_tool_call(tool_name="read_file", args={}, result="ok", task_id="t", session_id="s")
+
+
+# ---------------------------------------------------------------------------
+# Placeholder-credential guard (#23823).
+#
+# Regression coverage for the silent-failure bug: when an operator leaves
+# HERMES_LANGFUSE_PUBLIC_KEY / SECRET_KEY at a template value like
+# "placeholder", "test-key", or "your-langfuse-key", the SDK accepts the
+# credentials at construction time (it does no server-side validation
+# eagerly) but drops every trace at flush time, with no signal in the
+# Hermes logs.  The fix in `_get_langfuse()` validates the documented
+# `pk-lf-` / `sk-lf-` prefix Langfuse always issues, surfaces a one-shot
+# warning naming the offending env var(s), and short-circuits via the
+# same `_INIT_FAILED` path used for missing credentials so subsequent
+# hook invocations don't re-log.
+# ---------------------------------------------------------------------------
+
+
+class _FakeLangfuse:
+    """Stand-in for the real :class:`langfuse.Langfuse` so tests don't
+    need the optional ``langfuse`` SDK installed.  The plugin's runtime
+    gate refuses to proceed past ``if Langfuse is None`` when the SDK
+    is missing, which would short-circuit before the placeholder check
+    can fire.  Patching ``plugin.Langfuse`` with this class lets the
+    placeholder validator exercise its full code path."""
+
+    instances: list["_FakeLangfuse"] = []
+
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        _FakeLangfuse.instances.append(self)
+
+
+class TestPlaceholderKeyDetection:
+    LOGGER_NAME = "plugins.observability.langfuse"
+
+    def _fresh_plugin(self, monkeypatch=None):
+        mod_name = "plugins.observability.langfuse"
+        sys.modules.pop(mod_name, None)
+        mod = importlib.import_module(mod_name)
+        if monkeypatch is not None:
+            # Pretend the SDK is installed so `_get_langfuse()` actually
+            # reaches the placeholder check.  Real SDK calls are never
+            # made because the placeholder/missing-credentials paths
+            # return before constructing a client.
+            _FakeLangfuse.instances.clear()
+            monkeypatch.setattr(mod, "Langfuse", _FakeLangfuse, raising=False)
+        return mod
+
+    @staticmethod
+    def _clear_env(monkeypatch):
+        for k in (
+            "HERMES_LANGFUSE_PUBLIC_KEY", "HERMES_LANGFUSE_SECRET_KEY",
+            "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY",
+        ):
+            monkeypatch.delenv(k, raising=False)
+
+    # -- helper unit tests (no SDK stub needed: these don't go through
+    #    _get_langfuse, they exercise the pure-Python helpers directly) ------
+
+    def test_redact_key_preview_empty(self, monkeypatch):
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin()
+        assert plugin._redact_key_preview("") == "<empty>"
+
+    def test_redact_key_preview_short_value_echoed(self, monkeypatch):
+        """Short placeholder strings are echoed in full so the operator
+        can see exactly which template they forgot to replace."""
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin()
+        assert plugin._redact_key_preview("placeholder") == "'placeholder'"
+        assert plugin._redact_key_preview("test-key") == "'test-key'"
+
+    def test_redact_key_preview_long_value_truncated(self, monkeypatch):
+        """If an operator pasted a real secret into the wrong env var the
+        preview must NOT echo it in full — only the leading 6 chars."""
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin()
+        result = plugin._redact_key_preview("sk-lf-abcdefghijklmnop")
+        assert "abcdefghij" not in result
+        assert result.startswith("'sk-lf-")
+        assert result.endswith("...'")
+
+    def test_validate_langfuse_key_accepts_documented_prefix(self, monkeypatch):
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin()
+        assert plugin._validate_langfuse_key(
+            "HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz"
+        ) is None
+        assert plugin._validate_langfuse_key(
+            "HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz"
+        ) is None
+
+    def test_validate_langfuse_key_rejects_wrong_prefix(self, monkeypatch):
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin()
+        msg = plugin._validate_langfuse_key(
+            "HERMES_LANGFUSE_PUBLIC_KEY", "placeholder"
+        )
+        assert msg is not None
+        assert "HERMES_LANGFUSE_PUBLIC_KEY" in msg
+        assert "pk-lf-" in msg
+
+    def test_validate_langfuse_key_unknown_name_passes(self, monkeypatch):
+        """Defensive: an env var with no registered prefix is trusted."""
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin()
+        assert plugin._validate_langfuse_key("HERMES_LANGFUSE_BASE_URL", "anything") is None
+
+    # -- end-to-end _get_langfuse() behaviour --------------------------------
+    # These tests pass `monkeypatch` to _fresh_plugin() so the helper can
+    # stub out `Langfuse` (the optional SDK).  Without that, every call
+    # short-circuits at `if Langfuse is None` before reaching the
+    # placeholder validator — masking the very behaviour we're testing.
+
+    def test_placeholder_public_key_warns_and_skips(self, monkeypatch, caplog):
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder")
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        text = caplog.text
+        assert "HERMES_LANGFUSE_PUBLIC_KEY" in text
+        assert "'placeholder'" in text
+        assert "pk-lf-" in text
+        # The valid secret value must NOT appear (the var NAME does, in
+        # the "or unset ..." hint, but the value preview shouldn't).
+        assert "'sk-lf-" not in text
+        # Never constructed the SDK client — short-circuited before that.
+        assert _FakeLangfuse.instances == []
+
+    def test_placeholder_secret_key_warns_and_skips(self, monkeypatch, caplog):
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz")
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "test-key")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        text = caplog.text
+        assert "HERMES_LANGFUSE_SECRET_KEY" in text
+        assert "'test-key'" in text
+        assert "sk-lf-" in text
+        # The valid public value must NOT appear.
+        assert "'pk-lf-" not in text
+        assert _FakeLangfuse.instances == []
+
+    def test_both_placeholders_one_warning_with_both_keys(self, monkeypatch, caplog):
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder")
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        warnings = [r for r in caplog.records if r.levelname == "WARNING"
+                    and r.name == self.LOGGER_NAME]
+        assert len(warnings) == 1, (
+            f"Expected a single combined warning; got {len(warnings)}:\n"
+            + "\n".join(r.getMessage() for r in warnings)
+        )
+        text = warnings[0].getMessage()
+        assert "HERMES_LANGFUSE_PUBLIC_KEY" in text
+        assert "HERMES_LANGFUSE_SECRET_KEY" in text
+
+    def test_repeated_calls_do_not_re_warn(self, monkeypatch, caplog):
+        """The cached ``_INIT_FAILED`` sentinel must short-circuit
+        subsequent calls so each hook invocation isn't a fresh log
+        line — otherwise a busy gateway will spam the operator's
+        terminal."""
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder")
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            for _ in range(15):
+                assert plugin._get_langfuse() is None
+        warnings = [r for r in caplog.records if r.levelname == "WARNING"
+                    and r.name == self.LOGGER_NAME]
+        assert len(warnings) == 1, (
+            f"Warning fired {len(warnings)} times across 15 calls; "
+            "expected 1 (cached via _INIT_FAILED)"
+        )
+
+    @pytest.mark.parametrize("placeholder", [
+        "placeholder",
+        "test-key",
+        "your-langfuse-key",
+        "change-me",
+        "xxx",
+        "dummy-key-here",
+        "<your-key>",
+        "REPLACE_ME",
+    ])
+    def test_common_placeholders_detected(self, monkeypatch, caplog, placeholder):
+        """A grab-bag of values that real-world ``.env.example`` templates
+        use as stand-ins.  Any of them in either key must trip the guard."""
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", placeholder)
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        assert "HERMES_LANGFUSE_PUBLIC_KEY" in caplog.text
+
+    def test_legacy_LANGFUSE_PUBLIC_KEY_also_validated(self, monkeypatch, caplog):
+        """The plugin reads both the canonical HERMES_-prefixed env var and
+        the legacy bare ``LANGFUSE_PUBLIC_KEY``.  The validator must run on
+        whichever value ``_get_langfuse()`` actually consumed."""
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "placeholder")
+        monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        # Warning names the canonical user-facing env var (the bare
+        # LANGFUSE_PUBLIC_KEY is a backwards-compat alias for the
+        # HERMES_-prefixed one — operators set the HERMES_-prefixed one).
+        assert "HERMES_LANGFUSE_PUBLIC_KEY" in caplog.text
+        assert "'placeholder'" in caplog.text
+
+    def test_missing_credentials_still_skip_silently(self, monkeypatch, caplog):
+        """Missing-creds is the documented opt-out path (operator hasn't
+        configured the plugin yet) — it must remain SILENT.  Regression
+        guard against the placeholder validator accidentally running on
+        empty values and re-introducing log noise for unconfigured
+        installs."""
+        self._clear_env(monkeypatch)
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        warnings = [r for r in caplog.records if r.levelname == "WARNING"
+                    and r.name == self.LOGGER_NAME]
+        assert warnings == []
+
+    def test_sdk_not_installed_still_skips_silently(self, monkeypatch, caplog):
+        """If the langfuse SDK isn't installed at all, the placeholder
+        check should never run — there's nothing the operator can do
+        about a credential mismatch when the package is missing, and
+        re-warning here would dilute the actually-actionable SDK-missing
+        signal upstream.  The ``Langfuse is None`` guard at the top of
+        ``_get_langfuse`` already handles this; this test pins that
+        behaviour."""
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder")
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder")
+        # NO monkeypatch on Langfuse here — falls back to whatever the
+        # plugin imported at module load (None if SDK absent).
+        plugin = self._fresh_plugin()
+        monkeypatch.setattr(plugin, "Langfuse", None, raising=False)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            assert plugin._get_langfuse() is None
+        warnings = [r for r in caplog.records if r.levelname == "WARNING"
+                    and r.name == self.LOGGER_NAME]
+        assert warnings == []
+
+    def test_valid_prefixes_do_not_trigger_placeholder_warning(self, monkeypatch, caplog):
+        """Real Langfuse keys (``pk-lf-…`` / ``sk-lf-…``) must pass the
+        guard and proceed to SDK init.  We stub the SDK constructor with
+        a recording fake so the assertion can confirm BOTH that the
+        placeholder warning didn't fire AND that the client was actually
+        constructed — the latter is the success signal the bug report
+        wanted."""
+        self._clear_env(monkeypatch)
+        monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz")
+        monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz")
+        plugin = self._fresh_plugin(monkeypatch)
+        with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME):
+            client = plugin._get_langfuse()
+        assert isinstance(client, _FakeLangfuse)
+        assert client.kwargs["public_key"] == "pk-lf-real-public-xyz"
+        assert client.kwargs["secret_key"] == "sk-lf-real-secret-xyz"
+        assert "placeholders" not in caplog.text.lower(), (
+            f"Valid Langfuse keys tripped the placeholder guard: {caplog.text!r}"
+        )
+
+
+class TestRequestMessageCoercion:
+    def test_prefers_request_messages_then_messages_then_history_then_user_message(self):
+        sys.modules.pop("plugins.observability.langfuse", None)
+        mod = importlib.import_module("plugins.observability.langfuse")
+
+        assert mod._coerce_request_messages(
+            request_messages=[{"role": "system", "content": "s"}],
+            messages=[{"role": "user", "content": "m"}],
+            conversation_history=[{"role": "user", "content": "h"}],
+            user_message="u",
+        ) == [{"role": "system", "content": "s"}]
+        assert mod._coerce_request_messages(
+            messages=[{"role": "user", "content": "m"}],
+            conversation_history=[{"role": "user", "content": "h"}],
+            user_message="u",
+        ) == [{"role": "user", "content": "m"}]
+        assert mod._coerce_request_messages(
+            conversation_history=[{"role": "user", "content": "h"}],
+            user_message="u",
+        ) == [{"role": "user", "content": "h"}]
+        assert mod._coerce_request_messages(user_message="u") == [{"role": "user", "content": "u"}]
+
+
+class TestToolCallOutputBackfill:
+    def test_post_tool_call_backfills_matching_turn_tool_call_output(self, monkeypatch):
+        sys.modules.pop("plugins.observability.langfuse", None)
+        mod = importlib.import_module("plugins.observability.langfuse")
+
+        observation = object()
+        state = mod.TraceState(trace_id="trace-1", root_ctx=None, root_span=None)
+        state.tools["call-1"] = observation
+        state.turn_tool_calls.append({
+            "id": "call-1",
+            "type": "function",
+            "name": "web_extract",
+            "arguments": '{"urls": ["https://example.com"]}',
+            "function": {
+                "name": "web_extract",
+                "arguments": '{"urls": ["https://example.com"]}',
+            },
+        })
+
+        task_key = mod._trace_key("task-1", "session-1")
+        monkeypatch.setitem(mod._TRACE_STATE, task_key, state)
+
+        ended = {}
+
+        def fake_end_observation(obs, *, output=None, metadata=None, usage_details=None, cost_details=None):
+            ended["observation"] = obs
+            ended["output"] = output
+            ended["metadata"] = metadata
+
+        monkeypatch.setattr(mod, "_end_observation", fake_end_observation)
+
+        mod.on_post_tool_call(
+            tool_name="web_extract",
+            args={"urls": ["https://example.com"]},
+            result='{"results": [{"url": "https://example.com", "content": "Example Domain"}]}',
+            task_id="task-1",
+            session_id="session-1",
+            tool_call_id="call-1",
+        )
+
+        assert ended["observation"] is observation
+        assert state.turn_tool_calls[0]["output"] == ended["output"]
+        assert state.turn_tool_calls[0]["function"]["output"] == ended["output"]
+        assert state.turn_tool_calls[0]["output"] == {
+            "results": [{"url": "https://example.com", "content": "Example Domain"}]
+        }
+
+    def test_serialize_messages_keeps_tool_name_and_call_id(self):
+        sys.modules.pop("plugins.observability.langfuse", None)
+        mod = importlib.import_module("plugins.observability.langfuse")
+
+        messages = [{
+            "role": "tool",
+            "name": "web_extract",
+            "tool_call_id": "call-1",
+            "content": '{"ok": true}',
+        }]
+
+        assert mod._serialize_messages(messages) == [{
+            "role": "tool",
+            "name": "web_extract",
+            "tool_call_id": "call-1",
+            "content": {"ok": True},
+        }]
+
+    def test_serialize_tool_calls_emits_openai_style_function_shape(self):
+        sys.modules.pop("plugins.observability.langfuse", None)
+        mod = importlib.import_module("plugins.observability.langfuse")
+
+        class _Fn:
+            name = "web_extract"
+            arguments = '{"urls": ["https://example.com"]}'
+
+        class _ToolCall:
+            id = "call-1"
+            type = "function"
+            function = _Fn()
+
+        assert mod._serialize_tool_calls([_ToolCall()]) == [{
+            "id": "call-1",
+            "type": "function",
+            "name": "web_extract",
+            "arguments": '{"urls": ["https://example.com"]}',
+            "function": {
+                "name": "web_extract",
+                "arguments": '{"urls": ["https://example.com"]}',
+            },
+        }]
+
+
+class TestToolObservationKeying:
+    """Tests for pre/post tool_call observation matching when tool_call_id is absent."""
+
+    def _make_mod(self):
+        sys.modules.pop("plugins.observability.langfuse", None)
+        return importlib.import_module("plugins.observability.langfuse")
+
+    def test_empty_tool_call_id_single_tool_sets_output(self, monkeypatch):
+        mod = self._make_mod()
+        obs = object()
+        state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None)
+        state.pending_tools_by_name.setdefault("my_tool", []).append(obs)
+
+        task_key = mod._trace_key("task-1", "sess-1")
+        monkeypatch.setitem(mod._TRACE_STATE, task_key, state)
+
+        ended = {}
+
+        def fake_end(o, *, output=None, metadata=None, **kw):
+            ended["obs"] = o
+            ended["output"] = output
+
+        monkeypatch.setattr(mod, "_end_observation", fake_end)
+
+        mod.on_post_tool_call(
+            tool_name="my_tool",
+            args={},
+            result='{"ok": true}',
+            task_id="task-1",
+            session_id="sess-1",
+            tool_call_id="",
+        )
+
+        assert ended["obs"] is obs
+        assert ended["output"] == {"ok": True}
+        assert state.pending_tools_by_name.get("my_tool") is None
+
+    def test_empty_tool_call_id_observations_are_fifo_within_tool_name(self, monkeypatch):
+        """Two queued observations are consumed in FIFO order so the first
+        post hook gets the first observation's output, not the second.
+
+        Sequential-on-one-thread coverage; the real concurrent case is
+        guarded by ``_STATE_LOCK`` around every read-modify-write on
+        ``pending_tools_by_name`` and is exercised in
+        ``test_threaded_post_calls_preserve_fifo_under_lock`` below.
+        """
+        mod = self._make_mod()
+        obs_a, obs_b = object(), object()
+        state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None)
+        state.pending_tools_by_name["web_extract"] = [obs_a, obs_b]
+
+        task_key = mod._trace_key("task-1", "sess-1")
+        monkeypatch.setitem(mod._TRACE_STATE, task_key, state)
+
+        calls = []
+
+        def fake_end(o, *, output=None, metadata=None, **kw):
+            calls.append((o, output))
+
+        monkeypatch.setattr(mod, "_end_observation", fake_end)
+
+        mod.on_post_tool_call(
+            tool_name="web_extract", args={}, result='{"val": "a"}',
+            task_id="task-1", session_id="sess-1", tool_call_id="",
+        )
+        mod.on_post_tool_call(
+            tool_name="web_extract", args={}, result='{"val": "b"}',
+            task_id="task-1", session_id="sess-1", tool_call_id="",
+        )
+
+        assert calls[0] == (obs_a, {"val": "a"})
+        assert calls[1] == (obs_b, {"val": "b"})
+        assert state.pending_tools_by_name.get("web_extract") is None
+
+    def test_threaded_post_calls_preserve_fifo_under_lock(self, monkeypatch):
+        """The actual concurrency contract: when 8 threads race to drain
+        the pending queue, no observation is consumed twice and none is
+        lost.  Validates ``_STATE_LOCK`` discipline, not Python list
+        semantics."""
+        import threading
+
+        mod = self._make_mod()
+        n = 8
+        observations = [object() for _ in range(n)]
+        state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None)
+        state.pending_tools_by_name["web_extract"] = list(observations)
+
+        task_key = mod._trace_key("task-thr", "sess-thr")
+        monkeypatch.setitem(mod._TRACE_STATE, task_key, state)
+
+        recorded: list = []
+        lock = threading.Lock()
+
+        def fake_end(o, *, output=None, metadata=None, **kw):
+            with lock:
+                recorded.append(o)
+
+        monkeypatch.setattr(mod, "_end_observation", fake_end)
+
+        barrier = threading.Barrier(n)
+
+        def worker():
+            barrier.wait()
+            mod.on_post_tool_call(
+                tool_name="web_extract", args={}, result='{"ok": true}',
+                task_id="task-thr", session_id="sess-thr", tool_call_id="",
+            )
+
+        threads = [threading.Thread(target=worker) for _ in range(n)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        # Every observation was consumed exactly once; queue is empty.
+        assert len(recorded) == n
+        assert set(map(id, recorded)) == set(map(id, observations))
+        assert state.pending_tools_by_name.get("web_extract") is None
+
+    def test_explicit_tool_call_id_uses_tools_dict(self, monkeypatch):
+        """When tool_call_id is present, pending_tools_by_name is not touched."""
+        mod = self._make_mod()
+        obs = object()
+        state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None)
+        state.tools["call-99"] = obs
+
+        task_key = mod._trace_key("task-1", "sess-1")
+        monkeypatch.setitem(mod._TRACE_STATE, task_key, state)
+
+        ended = {}
+
+        def fake_end(o, *, output=None, metadata=None, **kw):
+            ended["obs"] = o
+            ended["output"] = output
+
+        monkeypatch.setattr(mod, "_end_observation", fake_end)
+
+        mod.on_post_tool_call(
+            tool_name="my_tool", args={}, result='{"status": "done"}',
+            task_id="task-1", session_id="sess-1", tool_call_id="call-99",
+        )
+
+        assert ended["obs"] is obs
+        assert ended["output"] == {"status": "done"}
+        assert not state.tools
+
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index dadb7b31cce..c493f91509a 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -2524,8 +2524,9 @@ class TestRunConversation:
         assert [call["api_call_count"] for call in pre_request_calls] == [1, 2]
         assert [call["api_call_count"] for call in post_request_calls] == [1, 2]
         assert all(call["session_id"] == agent.session_id for call in pre_request_calls)
-        assert all("message_count" in c and "messages" not in c for c in pre_request_calls)
-        assert all("usage" in c and "response" not in c for c in post_request_calls)
+        assert all("message_count" in c and isinstance(c.get("request_messages"), list) for c in pre_request_calls)
+        assert any(msg.get("role") == "user" and msg.get("content") == "search something" for msg in pre_request_calls[0]["request_messages"])
+        assert all("usage" in c and "response" in c and "assistant_message" in c for c in post_request_calls)
 
     def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent):
         self._setup_agent(agent)

From d5416284f11ccbc735c8357f0ab35ce5f683ccc3 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Fri, 15 May 2026 19:31:00 +0530
Subject: [PATCH 041/218] fix(tui): autonomous background process completion
 notifications (#26071) (#26327)

* feat(process-registry): add format_process_notification shared helper

* feat(process-registry): add drain_notifications method

* refactor(cli): use shared drain_notifications and format_process_notification

* feat(tui): add background notification poller for completion_queue

* feat(tui): wire notification poller into session init/finalize

* refactor(tui): add post-turn drain using shared helper as safety net
---
 cli.py                               |  59 +---------
 tests/test_tui_gateway_server.py     | 155 +++++++++++++++++++++++++++
 tests/tools/test_process_registry.py | 135 +++++++++++++++++++++++
 tools/process_registry.py            |  58 ++++++++++
 tui_gateway/server.py                | 134 +++++++++++++++++++++++
 5 files changed, 486 insertions(+), 55 deletions(-)

diff --git a/cli.py b/cli.py
index 27286a3c988..50e7a8c8ce9 100644
--- a/cli.py
+++ b/cli.py
@@ -1965,43 +1965,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
     return resolved
 
 
-def _format_process_notification(evt: dict) -> "str | None":
-    """Format a process notification event into a [IMPORTANT: ...] message.
 
-    Handles both completion events (notify_on_complete) and watch pattern
-    match events from the unified completion_queue.
-    """
-    evt_type = evt.get("type", "completion")
-    _sid = evt.get("session_id", "unknown")
-    _cmd = evt.get("command", "unknown")
-
-    if evt_type == "watch_disabled":
-        return f"[IMPORTANT: {evt.get('message', '')}]"
-
-    if evt_type == "watch_match":
-        _pat = evt.get("pattern", "?")
-        _out = evt.get("output", "")
-        _sup = evt.get("suppressed", 0)
-        text = (
-            f"[IMPORTANT: Background process {_sid} matched "
-            f"watch pattern \"{_pat}\".\n"
-            f"Command: {_cmd}\n"
-            f"Matched output:\n{_out}"
-        )
-        if _sup:
-            text += f"\n({_sup} earlier matches were suppressed by rate limit)"
-        text += "]"
-        return text
-
-    # Default: completion event
-    _exit = evt.get("exit_code", "?")
-    _out = evt.get("output", "")
-    return (
-        f"[IMPORTANT: Background process {_sid} completed "
-        f"(exit code {_exit}).\n"
-        f"Command: {_cmd}\n"
-        f"Output:\n{_out}]"
-    )
 
 
 def _detect_file_drop(user_input: str) -> "dict | None":
@@ -13518,16 +13482,8 @@ class HermesCLI:
                             # and watch pattern matches) while agent is idle.
                             try:
                                 from tools.process_registry import process_registry
-                                if not process_registry.completion_queue.empty():
-                                    evt = process_registry.completion_queue.get_nowait()
-                                    # Skip if the agent already consumed this via wait/poll/log
-                                    _evt_sid = evt.get("session_id", "")
-                                    if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
-                                        pass  # already delivered via tool result
-                                    else:
-                                        _synth = _format_process_notification(evt)
-                                        if _synth:
-                                            self._pending_input.put(_synth)
+                                for _evt, _synth in process_registry.drain_notifications():
+                                    self._pending_input.put(_synth)
                             except Exception:
                                 pass
                         continue
@@ -13635,15 +13591,8 @@ class HermesCLI:
                         # that arrived while the agent was running.
                         try:
                             from tools.process_registry import process_registry
-                            while not process_registry.completion_queue.empty():
-                                evt = process_registry.completion_queue.get_nowait()
-                                # Skip if the agent already consumed this via wait/poll/log
-                                _evt_sid = evt.get("session_id", "")
-                                if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
-                                    continue  # already delivered via tool result
-                                _synth = _format_process_notification(evt)
-                                if _synth:
-                                    self._pending_input.put(_synth)
+                            for _evt, _synth in process_registry.drain_notifications():
+                                self._pending_input.put(_synth)
                         except Exception:
                             pass  # Non-fatal — don't break the main loop
 
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 64a154bb9a7..0d5bad8e875 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -4649,3 +4649,158 @@ def test_config_show_displays_nested_max_turns(monkeypatch):
     )
 
     assert ["Max Turns", "120"] in agent_rows
+
+
+def test_notification_poller_delivers_completion(monkeypatch):
+    """Poller picks up completion events and triggers agent turns."""
+    from tools.process_registry import process_registry
+
+    turns = []
+    emitted = []
+
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            turns.append(prompt)
+            return {
+                "final_response": "ok",
+                "messages": [{"role": "assistant", "content": "ok"}],
+            }
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+        def start(self):
+            self._target()
+
+    sess = _session(agent=_Agent())
+    server._sessions["sid_poll"] = sess
+    monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a))
+    monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
+    monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
+
+    # Clear queue
+    while not process_registry.completion_queue.empty():
+        process_registry.completion_queue.get_nowait()
+    process_registry._completion_consumed.discard("proc_poller_test")
+
+    stop = threading.Event()
+
+    # Put event on queue, then immediately signal stop so the poller
+    # runs exactly one iteration.
+    process_registry.completion_queue.put({
+        "type": "completion",
+        "session_id": "proc_poller_test",
+        "command": "echo hello",
+        "exit_code": 0,
+        "output": "hello",
+    })
+    stop.set()
+
+    try:
+        server._notification_poller_loop(stop, "sid_poll", sess)
+
+        # Should have emitted a status.update with kind=process
+        status_calls = [a for a in emitted if a[0] == "status.update"]
+        assert len(status_calls) >= 1
+        assert status_calls[0][2]["kind"] == "process"
+
+        # Should have triggered an agent turn
+        assert len(turns) == 1
+        assert "[IMPORTANT: Background process proc_poller_test completed" in turns[0]
+    finally:
+        server._sessions.pop("sid_poll", None)
+        while not process_registry.completion_queue.empty():
+            process_registry.completion_queue.get_nowait()
+
+
+def test_notification_poller_skips_consumed(monkeypatch):
+    """Already-consumed completions are not dispatched by the poller."""
+    from tools.process_registry import process_registry
+
+    turns = []
+
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            turns.append(prompt)
+            return {"final_response": "ok", "messages": []}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+        def start(self):
+            self._target()
+
+    sess = _session(agent=_Agent())
+    server._sessions["sid_skip"] = sess
+    monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+    monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
+    monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
+
+    while not process_registry.completion_queue.empty():
+        process_registry.completion_queue.get_nowait()
+
+    process_registry._completion_consumed.add("proc_already_done")
+    process_registry.completion_queue.put({
+        "type": "completion",
+        "session_id": "proc_already_done",
+        "command": "echo x",
+        "exit_code": 0,
+        "output": "x",
+    })
+
+    stop = threading.Event()
+    stop.set()
+
+    try:
+        server._notification_poller_loop(stop, "sid_skip", sess)
+        assert len(turns) == 0
+    finally:
+        server._sessions.pop("sid_skip", None)
+        process_registry._completion_consumed.discard("proc_already_done")
+        while not process_registry.completion_queue.empty():
+            process_registry.completion_queue.get_nowait()
+
+
+def test_notification_poller_requeues_when_busy(monkeypatch):
+    """When the agent is busy, the poller requeues the event."""
+    from tools.process_registry import process_registry
+
+    emitted = []
+
+    sess = _session(running=True)  # agent is busy
+    server._sessions["sid_busy"] = sess
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a))
+
+    while not process_registry.completion_queue.empty():
+        process_registry.completion_queue.get_nowait()
+    process_registry._completion_consumed.discard("proc_busy_test")
+
+    evt = {
+        "type": "completion",
+        "session_id": "proc_busy_test",
+        "command": "make build",
+        "exit_code": 0,
+        "output": "ok",
+    }
+    process_registry.completion_queue.put(evt)
+
+    stop = threading.Event()
+    stop.set()
+
+    try:
+        server._notification_poller_loop(stop, "sid_busy", sess)
+
+        # Status update was emitted (user sees it)
+        status_calls = [a for a in emitted if a[0] == "status.update"]
+        assert len(status_calls) == 1
+
+        # Event was requeued (agent was busy, no turn triggered)
+        assert not process_registry.completion_queue.empty()
+        requeued = process_registry.completion_queue.get_nowait()
+        assert requeued["session_id"] == "proc_busy_test"
+    finally:
+        server._sessions.pop("sid_busy", None)
+        while not process_registry.completion_queue.empty():
+            process_registry.completion_queue.get_nowait()
diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py
index f438b637e28..46c29bb9d09 100644
--- a/tests/tools/test_process_registry.py
+++ b/tests/tools/test_process_registry.py
@@ -865,3 +865,138 @@ class TestProcessToolHandler:
         from tools.process_registry import _handle_process
         result = json.loads(_handle_process({"action": "unknown_action"}))
         assert "error" in result
+
+
+# =========================================================================
+# format_process_notification + drain_notifications (shared helpers)
+# =========================================================================
+
+from tools.process_registry import format_process_notification
+
+
+def test_format_completion_event():
+    evt = {
+        "type": "completion",
+        "session_id": "proc_abc",
+        "command": "sleep 5",
+        "exit_code": 0,
+        "output": "done",
+    }
+    result = format_process_notification(evt)
+    assert "[IMPORTANT: Background process proc_abc completed" in result
+    assert "exit code 0" in result
+    assert "Command: sleep 5" in result
+    assert "Output:\ndone]" in result
+
+
+def test_format_watch_match_event():
+    evt = {
+        "type": "watch_match",
+        "session_id": "proc_xyz",
+        "command": "tail -f log",
+        "pattern": "ERROR",
+        "output": "ERROR: disk full",
+        "suppressed": 0,
+    }
+    result = format_process_notification(evt)
+    assert 'watch pattern "ERROR"' in result
+    assert "Matched output:\nERROR: disk full" in result
+
+
+def test_format_watch_match_with_suppressed():
+    evt = {
+        "type": "watch_match",
+        "session_id": "proc_xyz",
+        "command": "tail -f log",
+        "pattern": "WARN",
+        "output": "WARN: low mem",
+        "suppressed": 3,
+    }
+    result = format_process_notification(evt)
+    assert "3 earlier matches were suppressed" in result
+
+
+def test_format_watch_disabled_event():
+    evt = {
+        "type": "watch_disabled",
+        "message": "Watch disabled for proc_xyz: too many matches",
+    }
+    result = format_process_notification(evt)
+    assert "[IMPORTANT: Watch disabled for proc_xyz" in result
+
+
+def test_format_returns_none_for_empty_event():
+    evt = {}
+    result = format_process_notification(evt)
+    assert result is not None
+    assert "unknown" in result
+
+
+def test_drain_notifications_returns_pending_events():
+    from tools.process_registry import process_registry
+
+    while not process_registry.completion_queue.empty():
+        process_registry.completion_queue.get_nowait()
+
+    process_registry.completion_queue.put({
+        "type": "completion",
+        "session_id": "proc_drain1",
+        "command": "echo hi",
+        "exit_code": 0,
+        "output": "hi",
+    })
+    process_registry.completion_queue.put({
+        "type": "watch_match",
+        "session_id": "proc_drain2",
+        "command": "tail -f x",
+        "pattern": "ERR",
+        "output": "ERR found",
+        "suppressed": 0,
+    })
+
+    try:
+        results = process_registry.drain_notifications()
+        assert len(results) == 2
+        assert results[0][0]["session_id"] == "proc_drain1"
+        assert "proc_drain1 completed" in results[0][1]
+        assert results[1][0]["session_id"] == "proc_drain2"
+        assert "watch pattern" in results[1][1]
+    finally:
+        while not process_registry.completion_queue.empty():
+            process_registry.completion_queue.get_nowait()
+        process_registry._completion_consumed.discard("proc_drain1")
+        process_registry._completion_consumed.discard("proc_drain2")
+
+
+def test_drain_notifications_skips_consumed():
+    from tools.process_registry import process_registry
+
+    while not process_registry.completion_queue.empty():
+        process_registry.completion_queue.get_nowait()
+
+    process_registry._completion_consumed.add("proc_consumed")
+    process_registry.completion_queue.put({
+        "type": "completion",
+        "session_id": "proc_consumed",
+        "command": "echo done",
+        "exit_code": 0,
+        "output": "done",
+    })
+
+    try:
+        results = process_registry.drain_notifications()
+        assert len(results) == 0
+    finally:
+        process_registry._completion_consumed.discard("proc_consumed")
+        while not process_registry.completion_queue.empty():
+            process_registry.completion_queue.get_nowait()
+
+
+def test_drain_notifications_empty_queue():
+    from tools.process_registry import process_registry
+
+    while not process_registry.completion_queue.empty():
+        process_registry.completion_queue.get_nowait()
+
+    results = process_registry.drain_notifications()
+    assert results == []
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 405abc04a3c..184939adf75 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -826,6 +826,26 @@ class ProcessRegistry:
         """Check if a completion notification was already consumed via wait/poll/log."""
         return session_id in self._completion_consumed
 
+    def drain_notifications(self) -> "list[tuple[dict, str]]":
+        """Pop all pending notification events and return formatted pairs.
+
+        Returns a list of (raw_event, formatted_text) tuples.
+        Skips completion events that were already consumed via wait/poll/log.
+        """
+        results = []
+        while not self.completion_queue.empty():
+            try:
+                evt = self.completion_queue.get_nowait()
+            except Exception:
+                break
+            _evt_sid = evt.get("session_id", "")
+            if evt.get("type") == "completion" and self.is_completion_consumed(_evt_sid):
+                continue
+            text = format_process_notification(evt)
+            if text:
+                results.append((evt, text))
+        return results
+
     def get(self, session_id: str) -> Optional[ProcessSession]:
         """Get a session by ID (running or finished)."""
         with self._lock:
@@ -1388,6 +1408,44 @@ class ProcessRegistry:
 process_registry = ProcessRegistry()
 
 
+def format_process_notification(evt: dict) -> "str | None":
+    """Format a process notification event into a [IMPORTANT: ...] message.
+
+    Handles completion events (notify_on_complete), watch pattern matches,
+    and watch disabled events from the unified completion_queue.
+    """
+    evt_type = evt.get("type", "completion")
+    _sid = evt.get("session_id", "unknown")
+    _cmd = evt.get("command", "unknown")
+
+    if evt_type == "watch_disabled":
+        return f"[IMPORTANT: {evt.get('message', '')}]"
+
+    if evt_type == "watch_match":
+        _pat = evt.get("pattern", "?")
+        _out = evt.get("output", "")
+        _sup = evt.get("suppressed", 0)
+        text = (
+            f"[IMPORTANT: Background process {_sid} matched "
+            f"watch pattern \"{_pat}\".\n"
+            f"Command: {_cmd}\n"
+            f"Matched output:\n{_out}"
+        )
+        if _sup:
+            text += f"\n({_sup} earlier matches were suppressed by rate limit)"
+        text += "]"
+        return text
+
+    _exit = evt.get("exit_code", "?")
+    _out = evt.get("output", "")
+    return (
+        f"[IMPORTANT: Background process {_sid} completed "
+        f"(exit code {_exit}).\n"
+        f"Command: {_cmd}\n"
+        f"Output:\n{_out}]"
+    )
+
+
 # ---------------------------------------------------------------------------
 # Registry -- the "process" tool schema + handler
 # ---------------------------------------------------------------------------
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 230387ce23b..4a9bc2b6590 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -287,6 +287,9 @@ def _finalize_session(session: dict | None, end_reason: str = "tui_close") -> No
     if not session or session.get("_finalized"):
         return
     session["_finalized"] = True
+    stop_event = session.get("_notif_stop")
+    if stop_event is not None:
+        stop_event.set()
 
     agent = session.get("agent")
     lock = session.get("history_lock")
@@ -579,6 +582,7 @@ def _start_agent_build(sid: str, session: dict) -> None:
                 pass
 
             _wire_callbacks(sid)
+            _sessions[sid]["_notif_stop"] = _start_notification_poller(sid, _sessions[sid])
             _notify_session_boundary("on_session_reset", key)
 
             info = _session_info(agent)
@@ -1955,6 +1959,7 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
         # session startup resilient).
         pass
     _wire_callbacks(sid)
+    _sessions[sid]["_notif_stop"] = _start_notification_poller(sid, _sessions[sid])
     _notify_session_boundary("on_session_reset", key)
     _emit("session.info", sid, _session_info(agent))
 
@@ -3027,6 +3032,105 @@ def _(rid, params: dict) -> dict:
     return _ok(rid, {"status": "streaming"})
 
 
+def _notification_poller_loop(
+    stop_event: threading.Event, sid: str, session: dict
+) -> None:
+    """Poll completion_queue and dispatch notifications autonomously.
+
+    Runs in a daemon thread started by _init_session(). Emits a
+    status.update (kind=process) for user visibility, then chains an
+    agent turn via _run_prompt_submit if the session is idle.
+
+    NOTE: The completion_queue is global (one per process). If multiple
+    TUI sessions coexist, whichever poller wakes first grabs the event,
+    even if the process was started by a different session. This matches
+    CLI/gateway behavior (single session per process).
+    """
+    from tools.process_registry import process_registry, format_process_notification
+
+    while not stop_event.is_set() and not session.get("_finalized"):
+        try:
+            evt = process_registry.completion_queue.get(timeout=0.5)
+        except Exception:
+            continue
+
+        _evt_sid = evt.get("session_id", "")
+        if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
+            continue
+
+        text = format_process_notification(evt)
+        if not text:
+            continue
+
+        _emit("status.update", sid, {"kind": "process", "text": text})
+
+        with session["history_lock"]:
+            if session.get("running"):
+                process_registry.completion_queue.put(evt)
+                continue
+            session["running"] = True
+
+        rid = f"__notif__{int(time.time() * 1000)}"
+        try:
+            _emit("message.start", sid)
+            _run_prompt_submit(rid, sid, session, text)
+        except Exception as exc:
+            print(
+                f"[tui_gateway] notification poller dispatch failed: "
+                f"{type(exc).__name__}: {exc}",
+                file=sys.stderr,
+            )
+            with session["history_lock"]:
+                session["running"] = False
+
+    # Drain any remaining events after stop signal (process all pending
+    # before exiting so nothing is lost on shutdown).
+    while not process_registry.completion_queue.empty():
+        try:
+            evt = process_registry.completion_queue.get_nowait()
+        except Exception:
+            break
+        _evt_sid = evt.get("session_id", "")
+        if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
+            continue
+        text = format_process_notification(evt)
+        if not text:
+            continue
+
+        _emit("status.update", sid, {"kind": "process", "text": text})
+
+        with session["history_lock"]:
+            if session.get("running"):
+                process_registry.completion_queue.put(evt)
+                break
+            session["running"] = True
+
+        rid = f"__notif__{int(time.time() * 1000)}"
+        try:
+            _emit("message.start", sid)
+            _run_prompt_submit(rid, sid, session, text)
+        except Exception as exc:
+            print(
+                f"[tui_gateway] notification poller dispatch failed: "
+                f"{type(exc).__name__}: {exc}",
+                file=sys.stderr,
+            )
+            with session["history_lock"]:
+                session["running"] = False
+
+
+def _start_notification_poller(sid: str, session: dict) -> threading.Event:
+    """Start the background notification poller for a TUI session."""
+    stop = threading.Event()
+    t = threading.Thread(
+        target=_notification_poller_loop,
+        args=(stop, sid, session),
+        daemon=True,
+    )
+    t.start()
+    return stop
+
+
 def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
     with session["history_lock"]:
         history = list(session["history"])
@@ -3385,6 +3489,36 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
                 with session["history_lock"]:
                     session["running"] = False
 
+        # Drain completion notifications that arrived during this turn.
+        # The background poller handles between-turn delivery; this is
+        # the safety net for events that arrived mid-turn.
+        try:
+            from tools.process_registry import process_registry
+
+            for _evt, synth in process_registry.drain_notifications():
+                with session["history_lock"]:
+                    if session.get("running"):
+                        process_registry.completion_queue.put(_evt)
+                        break
+                    session["running"] = True
+                try:
+                    _emit("message.start", sid)
+                    _run_prompt_submit(rid, sid, session, synth)
+                except Exception as _n_exc:
+                    print(
+                        f"[tui_gateway] completion notification dispatch failed: "
+                        f"{type(_n_exc).__name__}: {_n_exc}",
+                        file=sys.stderr,
+                    )
+                    with session["history_lock"]:
+                        session["running"] = False
+        except Exception as _drain_exc:
+            print(
+                f"[tui_gateway] completion queue drain failed: "
+                f"{type(_drain_exc).__name__}: {_drain_exc}",
+                file=sys.stderr,
+            )
+
     threading.Thread(target=run, daemon=True).start()
 
 
From 9fb40e6a3d6338b6a6a616010de7a16672148924 Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Fri, 15 May 2026 07:41:50 -0700
Subject: [PATCH 042/218] fix(tui): restrict fast-echo bypass to ASCII so
 Vietnamese/CJK/IME input renders correctly (#26011)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly

The composer's fast-echo path (canFastAppend / canFastBackspace) writes
characters straight to stdout to skip an Ink re-render on the hot
typing path. The previous guard only checked
'stringWidth(text) === text.length', which lets a lot of non-ASCII
through:

  - Vietnamese precomposed letters (ề, ắ, ờ, ự, ...) report width 1 and
    length 1, but a Vietnamese Telex / IME stack produces them across
    multiple keystrokes; the intermediate composition state must be
    drawn by Ink so the rendered cell, the stored value, and the
    cursor column stay in lockstep when the final commit replaces the
    preview.
  - NFD combining marks (U+0300..U+036F) are zero-width but length 1,
    so even a passing equality lets them slip and silently desync the
    cell column.
  - CJK/East-Asian wide and emoji rejected only because their length
    differs, but the boundary was shape-shaped, not intent-shaped.

User-visible bug from the original report:
  Example: eê noiói nge neène
  -> the bypass committed the IME preview char before the diacritic
     replaced it, leaving doubled letters on screen.

Fix: gate fast-echo on pure printable ASCII (0x20-0x7e). The
performance-critical English typing path is unchanged; everything else
goes through the normal Ink render path so layout stays accurate.

Also extracts the shape preconditions as pure exported helpers
(canFastAppendShape / canFastBackspaceShape) so the regression matrix
is testable without spinning up a TextInput.

Tests: ui-tui/src/__tests__/textInputFastEcho.test.ts adds 20 cases
covering ASCII still works, Vietnamese precomposed + NFD, CJK, emoji,
NBSP / Latin-1, ANSI / control bytes, multi-line, and end-of-line
preconditions. Verified RED on the previous guard (11 of 20 fail) and
GREEN on the new guard.

Refs: #5221, #7443, #17602, #17603 (similar wide-char rendering bugs).

* docs(tui): clarify Vietnamese char terminology in regression comment

Address Copilot review: 'single byte width' implied UTF-8 byte semantics,
but the relevant property is JS code units (`text.length === 1`) and
display width (`stringWidth === 1`). Reworded to match.
---
 .../src/__tests__/textInputFastEcho.test.ts   | 136 ++++++++++++++++++
 ui-tui/src/components/textInput.tsx           | 101 ++++++++++---
 2 files changed, 218 insertions(+), 19 deletions(-)
 create mode 100644 ui-tui/src/__tests__/textInputFastEcho.test.ts

diff --git a/ui-tui/src/__tests__/textInputFastEcho.test.ts b/ui-tui/src/__tests__/textInputFastEcho.test.ts
new file mode 100644
index 00000000000..7f246f19f21
--- /dev/null
+++ b/ui-tui/src/__tests__/textInputFastEcho.test.ts
@@ -0,0 +1,136 @@
+import { describe, expect, it } from 'vitest'
+
+import { canFastAppendShape, canFastBackspaceShape } from '../components/textInput.js'
+
+// The fast-echo path bypasses Ink and writes characters directly to stdout
+// for the common case of typing plain English at the end of the line. These
+// tests pin the shape preconditions that make that bypass safe.
+//
+// Regression intent: any non-ASCII text — Vietnamese precomposed letters
+// (one grapheme, `text.length === 1`, `stringWidth === 1`, but produced
+// via IME composition across multiple keystrokes), combining marks
+// (zero width), CJK (double width), emoji (variable width), or anything
+// that could be produced by an in-flight IME composition — must NOT
+// take the bypass. Closes:
+//   - "TUI is experiencing font errors when using Unicode to type Vietnamese"
+//   - #5221  TUI input box renders incorrectly for CJK / East-Asian wide
+//   - #7443  CLI TUI renders and deletes Chinese characters incorrectly
+//   - #17602 / #17603  Chinese text scattering / ghosting
+
+describe('canFastAppendShape', () => {
+  const COLS = 40
+
+  it('accepts plain ASCII appended at end of single-line input', () => {
+    expect(canFastAppendShape('hello', 5, 'x', COLS, 5)).toBe(true)
+    expect(canFastAppendShape('hello', 5, ' world', COLS, 5)).toBe(true)
+  })
+
+  it('rejects when cursor is not at end of line', () => {
+    expect(canFastAppendShape('hello', 3, 'x', COLS, 5)).toBe(false)
+  })
+
+  it('rejects when current is empty (placeholder render path needed)', () => {
+    expect(canFastAppendShape('', 0, 'x', COLS, 0)).toBe(false)
+  })
+
+  it('rejects when current contains a newline (multi-line layout)', () => {
+    expect(canFastAppendShape('hi\nthere', 8, 'x', COLS, 5)).toBe(false)
+  })
+
+  it('rejects when appending would hit the wrap column', () => {
+    // Reaching cols on append must trigger a wrap, which the bypass
+    // cannot draw. Stay strictly below cols.
+    expect(canFastAppendShape('hello', 5, 'x', 6, 5)).toBe(false)
+  })
+
+  // -- Regression coverage: Vietnamese / combining marks / IME --
+
+  it('rejects Vietnamese precomposed letter ề (U+1EC1) — IME composition path', () => {
+    // 'ề' is one grapheme, length 1, width 1, but Vietnamese Telex/IME
+    // produces it via a multi-key composition. Fast-echo would commit the
+    // intermediate state to stdout and desync once the final commit
+    // arrives.
+    expect(canFastAppendShape('hello', 5, 'ề', COLS, 5)).toBe(false)
+  })
+
+  it('rejects Vietnamese tone marks ă, ơ, ư (Latin-Extended-A/B)', () => {
+    for (const ch of ['ă', 'ắ', 'ơ', 'ờ', 'ư', 'ự']) {
+      expect(canFastAppendShape('hello', 5, ch, COLS, 5)).toBe(false)
+    }
+  })
+
+  it('rejects NFD combining marks (U+0300 grave, U+0301 acute, U+0302 circumflex)', () => {
+    // Decomposed Vietnamese: 'e' + combining circumflex + combining grave
+    // = 'ề'. Each combining mark is zero-width but length 1; without the
+    // ASCII guard the second/third keypress would be fast-echoed and
+    // desync the cell column.
+    expect(canFastAppendShape('hello', 5, '\u0300', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, '\u0301', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, '\u0302', COLS, 5)).toBe(false)
+  })
+
+  it('rejects CJK (East-Asian wide) characters', () => {
+    expect(canFastAppendShape('hello', 5, '你', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, '日本', COLS, 5)).toBe(false)
+  })
+
+  it('rejects emoji', () => {
+    expect(canFastAppendShape('hello', 5, '🙂', COLS, 5)).toBe(false)
+  })
+
+  it('rejects ANSI-bearing or control text', () => {
+    expect(canFastAppendShape('hello', 5, '\x1b[31m', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, '\t', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, '\x7f', COLS, 5)).toBe(false)
+  })
+
+  it('rejects NBSP and Latin-1 letters that would change the line shape', () => {
+    expect(canFastAppendShape('hello', 5, '\u00a0', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, 'é', COLS, 5)).toBe(false)
+    expect(canFastAppendShape('hello', 5, 'ñ', COLS, 5)).toBe(false)
+  })
+})
+
+describe('canFastBackspaceShape', () => {
+  it('accepts deleting the last ASCII char', () => {
+    expect(canFastBackspaceShape('hello', 5)).toBe(true)
+  })
+
+  it('rejects when cursor is not at end', () => {
+    expect(canFastBackspaceShape('hello', 3)).toBe(false)
+  })
+
+  it('rejects when there is nothing to delete', () => {
+    expect(canFastBackspaceShape('', 0)).toBe(false)
+    expect(canFastBackspaceShape('hello', 0)).toBe(false)
+  })
+
+  it('rejects when value contains a newline', () => {
+    expect(canFastBackspaceShape('hi\nthere', 8)).toBe(false)
+  })
+
+  it('rejects deleting Vietnamese precomposed letter ề', () => {
+    // The "\b \b" shortcut clears one terminal cell; that's fine for a
+    // 1-cell ASCII char but if the previous grapheme is a Vietnamese
+    // letter that the IME may still be holding open, we want Ink to
+    // re-render so composition state stays consistent.
+    expect(canFastBackspaceShape('helloề', 'helloề'.length)).toBe(false)
+  })
+
+  it('rejects deleting a CJK character (2 cells)', () => {
+    expect(canFastBackspaceShape('hi你', 'hi你'.length)).toBe(false)
+  })
+
+  it('rejects deleting a NFD-composed grapheme with combining marks', () => {
+    // 'e' + U+0302 (circumflex) + U+0300 (grave) — final grapheme is one
+    // cluster but the previous-grapheme slice is multi-codepoint. Width
+    // is 1 but the bypass would be unsafe because the rendered cell
+    // already contained the combined glyph.
+    const s = 'hello' + 'e\u0302\u0300'
+    expect(canFastBackspaceShape(s, s.length)).toBe(false)
+  })
+
+  it('rejects deleting an emoji', () => {
+    expect(canFastBackspaceShape('hi🙂', 'hi🙂'.length)).toBe(false)
+  })
+})
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 0c63ceb93c8..91e109fa366 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -179,6 +179,84 @@ export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
 
 export { offsetFromPosition }
 
+const ASCII_PRINTABLE_RE = /^[\x20-\x7e]+$/
+
+/**
+ * Pure shape-only precondition for the fast-echo append path.
+ *
+ * The fast-echo path bypasses Ink's renderer and writes text directly to
+ * stdout, so the stored value, the rendered terminal cells, and the cursor
+ * column must all stay in sync without any layout work. We only allow it
+ * when the inserted text is pure printable ASCII so that:
+ *
+ *   - `text.length` matches the number of grapheme clusters (no combining
+ *     marks, no surrogate pairs, no precomposed CJK / Latin-Extended
+ *     letters that an IME might still be holding open as a composition),
+ *   - terminal width is exactly 1 cell per character (no East-Asian wide,
+ *     no zero-width, no ambiguous-width fonts),
+ *   - input methods (Vietnamese Telex, IME, dead-keys) cannot leak
+ *     intermediate composition bytes through the bypass before the final
+ *     commit arrives — those always go through the normal Ink render path
+ *     and stay layout-accurate (closes #5221, #7443, #17602/#17603).
+ *
+ * We deliberately do NOT just check `stringWidth(text) === text.length`:
+ * Vietnamese precomposed letters like "ề" (U+1EC1) report width 1 and
+ * length 1 but are still produced by IME compositions and must not be
+ * fast-echoed.
+ */
+export function canFastAppendShape(
+  current: string,
+  cursor: number,
+  text: string,
+  columns: number,
+  currentLineWidth: number
+): boolean {
+  if (cursor !== current.length) {
+    return false
+  }
+
+  if (current.length === 0) {
+    return false
+  }
+
+  if (current.includes('\n')) {
+    return false
+  }
+
+  if (!ASCII_PRINTABLE_RE.test(text)) {
+    return false
+  }
+
+  return currentLineWidth + text.length < Math.max(1, columns)
+}
+
+/**
+ * Pure shape-only precondition for the fast-echo backspace path.
+ *
+ * Same reasoning as canFastAppendShape — only allow the direct
+ * "\b \b" stdout shortcut when the deleted grapheme is pure printable
+ * ASCII. Anything else (combining marks, IME compositions, wide chars,
+ * tabs, ANSI fragments) goes through the normal render path so Ink can
+ * recompute cell widths.
+ */
+export function canFastBackspaceShape(current: string, cursor: number): boolean {
+  if (cursor !== current.length) {
+    return false
+  }
+
+  if (cursor <= 0) {
+    return false
+  }
+
+  if (current.includes('\n')) {
+    return false
+  }
+
+  const removed = current.slice(prevPos(current, cursor), cursor)
+
+  return ASCII_PRINTABLE_RE.test(removed)
+}
+
 function renderWithCursor(value: string, cursor: number) {
   const pos = Math.max(0, Math.min(cursor, value.length))
 
@@ -444,26 +522,11 @@ export function TextInput({
 
   const canFastEchoBase = () => focus && termFocus && !selected && !mask && !!stdout?.isTTY
 
-  const canFastAppend = (current: string, cursor: number, text: string) => {
-    const sw = stringWidth(text)
+  const canFastAppend = (current: string, cursor: number, text: string) =>
+    canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current)
 
-    return (
-      canFastEchoBase() &&
-      cursor === current.length &&
-      current.length > 0 &&
-      !current.includes('\n') &&
-      sw === text.length &&
-      lineWidthRef.current + sw < Math.max(1, columns)
-    )
-  }
-
-  const canFastBackspace = (current: string, cursor: number) => {
-    if (!canFastEchoBase() || cursor !== current.length || cursor <= 0 || current.includes('\n')) {
-      return false
-    }
-
-    return stringWidth(current.slice(prevPos(current, cursor), cursor)) === 1
-  }
+  const canFastBackspace = (current: string, cursor: number) =>
+    canFastEchoBase() && canFastBackspaceShape(current, cursor)
 
   const commit = (
     next: string,

From b62c9979732c732480491c63a4399034f668a44f Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 16:10:38 +0100
Subject: [PATCH 043/218] feat(xai-oauth): add xAI Grok OAuth (SuperGrok
 Subscription) provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a new authentication provider that lets SuperGrok subscribers sign
in to Hermes with their xAI account via the standard OAuth 2.0 PKCE
loopback flow, instead of pasting a raw API key from console.x.ai.

Highlights
----------
* OAuth 2.0 PKCE loopback login against accounts.x.ai with discovery,
  state/nonce, and a strict CORS-origin allowlist on the callback.
* Authorize URL carries `plan=generic` (required for non-allowlisted
  loopback clients) and `referrer=hermes-agent` for best-effort
  attribution in xAI's OAuth server logs.
* Token storage in `auth.json` with file-locked atomic writes; JWT
  `exp`-based expiry detection with skew; refresh-token rotation
  synced both ways between the singleton store and the credential
  pool so multi-process / multi-profile setups don't tear each other's
  refresh tokens.
* Reactive 401 retry: on a 401 from the xAI Responses API, the agent
  refreshes the token, swaps it back into `self.api_key`, and retries
  the call once. Guarded against silent account swaps when the active
  key was sourced from a different (manual) pool entry.
* Auxiliary tasks (curator, vision, embeddings, etc.) route through a
  dedicated xAI Responses-mode auxiliary client instead of falling back
  to OpenRouter billing.
* Direct HTTP tools (`tools/xai_http.py`, transcription, TTS, image-gen
  plugin) resolve credentials through a unified runtime → singleton →
  env-var fallback chain so xai-oauth users get them for free.
* `hermes auth add xai-oauth` and `hermes auth remove xai-oauth N` are
  wired through the standard auth-commands surface; remove cleans up
  the singleton loopback_pkce entry so it doesn't silently reinstate.
* `hermes model` provider picker shows
  "xAI Grok OAuth (SuperGrok Subscription)" and the model-flow falls
  back to pool credentials when the singleton is missing.

Hardening
---------
* Discovery and refresh responses validate the returned
  `token_endpoint` host against the same `*.x.ai` allowlist as the
  authorization endpoint, blocking MITM persistence of a hostile
  endpoint.
* Discovery / refresh / token-exchange `response.json()` calls are
  wrapped to raise typed `AuthError` on malformed bodies (captive
  portals, proxy error pages) instead of leaking JSONDecodeError
  tracebacks.
* `prompt_cache_key` is routed through `extra_body` on the codex
  transport (sending it as a top-level kwarg trips xAI's SDK with a
  TypeError).
* Credential-pool sync-back preserves `active_provider` so refreshing
  an OAuth entry doesn't silently flip the active provider out from
  under the running agent.

Testing
-------
* New `tests/hermes_cli/test_auth_xai_oauth_provider.py` (~63 tests)
  covers JWT expiry, OAuth URL params (plan + referrer), CORS origins,
  redirect URI validation, singleton↔pool sync, concurrency races,
  refresh error paths, runtime resolution, and malformed-JSON guards.
* Extended `test_credential_pool.py`, `test_codex_transport.py`, and
  `test_run_agent_codex_responses.py` cover the pool sync-back,
  `extra_body` routing, and 401 reactive refresh paths.
* 165 tests passing on this branch via `scripts/run_tests.sh`.
---
 agent/auxiliary_client.py                     |   72 +
 agent/codex_responses_adapter.py              |   15 +-
 agent/credential_pool.py                      |  184 +-
 agent/credential_sources.py                   |   30 +
 agent/transports/codex.py                     |   31 +-
 hermes_cli/auth.py                            |  806 ++++++++-
 hermes_cli/auth_commands.py                   |   31 +-
 hermes_cli/main.py                            |   89 +-
 hermes_cli/models.py                          |   43 +-
 hermes_cli/providers.py                       |   10 +
 hermes_cli/runtime_provider.py                |   23 +
 hermes_cli/setup.py                           |  116 +-
 hermes_cli/tools_config.py                    |   74 +-
 plugins/image_gen/xai/__init__.py             |   51 +-
 plugins/video_gen/xai/__init__.py             |   97 +-
 run_agent.py                                  |   78 +-
 .../agent/transports/test_codex_transport.py  |   43 +
 .../test_auth_xai_oauth_provider.py           | 1605 +++++++++++++++++
 tests/plugins/image_gen/test_xai_provider.py  |    9 +-
 tests/plugins/video_gen/test_xai_plugin.py    |   44 +
 .../test_run_agent_codex_responses.py         |  205 ++-
 tools/transcription_tools.py                  |   31 +-
 tools/tts_tool.py                             |   19 +-
 tools/xai_http.py                             |   49 +
 website/docs/guides/xai-grok-oauth.md         |  214 +++
 website/docs/integrations/providers.md        |    4 +-
 website/sidebars.ts                           |    1 +
 27 files changed, 3843 insertions(+), 131 deletions(-)
 create mode 100644 tests/hermes_cli/test_auth_xai_oauth_provider.py
 create mode 100644 website/docs/guides/xai-grok-oauth.md

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 96ad615bf6f..cd655e70e56 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1254,6 +1254,30 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
     return api_key, base_url
 
 
+def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]:
+    """Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients.
+
+    Routes through ``hermes_cli.auth``'s runtime resolver so the auto-refresh
+    path is shared with the main agent, instead of relying on whatever raw
+    tokens happen to be sitting in auth.json or the credential pool.  Returns
+    ``None`` if the user is not authenticated with xAI Grok OAuth (so
+    ``_resolve_auto`` Step 1 falls through to the next provider in the chain).
+    """
+    try:
+        from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+        creds = resolve_xai_oauth_runtime_credentials()
+    except Exception as exc:
+        logger.debug("Auxiliary xAI OAuth runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
     """Read a valid, non-expired Codex OAuth access token from Hermes auth store.
 
@@ -1744,6 +1768,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
     return _fallback_client, model
 
 
+def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
+    """Build a CodexAuxiliaryClient for an xAI Grok OAuth-authenticated session.
+
+    xAI's ``/v1/responses`` endpoint speaks the OpenAI Responses API, so we
+    wrap a plain ``OpenAI`` client in ``CodexAuxiliaryClient`` to translate
+    ``chat.completions.create()`` calls into ``responses.stream()`` requests.
+
+    The caller must pass an explicit model — pinning a default for Grok
+    would silently rot when xAI's allowlist drifts.  Returns ``(None, None)``
+    when the user has not authenticated with xAI Grok OAuth.
+    """
+    if not model:
+        logger.warning(
+            "Auxiliary client: xai-oauth requested without a model; "
+            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
+        )
+        return None, None
+    resolved = _resolve_xai_oauth_for_aux()
+    if resolved is None:
+        return None, None
+    api_key, base_url = resolved
+    logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
+    real_client = OpenAI(api_key=api_key, base_url=base_url)
+    return CodexAuxiliaryClient(real_client, model), model
+
+
 def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
     """Build a CodexAuxiliaryClient for an explicitly-requested model.
 
@@ -2851,6 +2901,26 @@ def resolve_provider_client(
         return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                 else (client, final_model))
 
+    # ── xAI Grok OAuth (loopback PKCE → Responses API) ───────────────
+    # Without this branch, an xai-oauth main provider falls through to the
+    # generic ``oauth_external`` arm below and returns ``(None, None)``,
+    # silently re-routing every auxiliary task (compression, web extract,
+    # session search, curator, etc.) to whatever Step-2 fallback the user
+    # has configured.  Users on xAI Grok OAuth would then see surprise
+    # OpenRouter / Nous bills for side tasks they thought were running on
+    # their xAI subscription.
+    if provider == "xai-oauth":
+        client, default = _build_xai_oauth_aux_client(model)
+        if client is None:
+            logger.warning(
+                "resolve_provider_client: xai-oauth requested but no xAI "
+                "OAuth token found (run: hermes model -> xAI Grok OAuth — SuperGrok Subscription)"
+            )
+            return None, None
+        final_model = _normalize_resolved_model(model or default, provider)
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                else (client, final_model))
+
     # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
     if provider == "custom":
         if explicit_base_url:
@@ -3201,6 +3271,8 @@ def resolve_provider_client(
             return resolve_provider_client("nous", model, async_mode)
         if provider == "openai-codex":
             return resolve_provider_client("openai-codex", model, async_mode)
+        if provider == "xai-oauth":
+            return resolve_provider_client("xai-oauth", model, async_mode)
         # Other OAuth providers not directly supported
         logger.warning("resolve_provider_client: OAuth provider %s not "
                        "directly supported, try 'auto'", provider)
diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
index ef4119ceb89..00345f054e8 100644
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -726,7 +726,7 @@ def _preflight_codex_api_kwargs(
         "model", "instructions", "input", "tools", "store",
         "reasoning", "include", "max_output_tokens", "temperature",
         "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
-        "extra_headers",
+        "extra_headers", "extra_body",
     }
     normalized: Dict[str, Any] = {
         "model": model,
@@ -776,6 +776,19 @@ def _preflight_codex_api_kwargs(
         if normalized_headers:
             normalized["extra_headers"] = normalized_headers
 
+    extra_body = api_kwargs.get("extra_body")
+    if extra_body is not None:
+        if not isinstance(extra_body, dict):
+            raise ValueError("Codex Responses request 'extra_body' must be an object.")
+        # Pass extra_body through verbatim — used by xAI Responses to
+        # carry `prompt_cache_key` as a body-level field (the documented
+        # cache-routing surface on /v1/responses). The openai SDK
+        # serializes extra_body into the JSON body without per-field
+        # type checks, so it survives Responses.stream() kwarg-signature
+        # changes that would otherwise raise TypeError before the wire.
+        if extra_body:
+            normalized["extra_body"] = dict(extra_body)
+
     if allow_stream:
         stream = api_kwargs.get("stream")
         if stream is not None and stream is not True:
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index aeda76225c8..504742145c1 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -29,6 +29,7 @@ from hermes_cli.auth import (
     _resolve_zai_base_url,
     _save_auth_store,
     _save_provider_state,
+    _store_provider_state,
     read_credential_pool,
     write_credential_pool,
 )
@@ -539,6 +540,64 @@ class CredentialPool:
             logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
         return entry
 
+    def _sync_xai_oauth_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync an xAI OAuth pool entry from auth.json if tokens differ.
+
+        xAI OAuth refresh tokens are single-use.  When another Hermes process
+        (or another profile sharing the same auth.json) refreshes the token,
+        it writes the new pair to ``providers["xai-oauth"]["tokens"]`` under
+        ``_auth_store_lock``.  Without this resync, our in-memory pool entry
+        keeps the consumed refresh_token and the next ``_refresh_entry`` call
+        would replay it and get a ``refresh_token_reused``-style 4xx.
+
+        Only applies to entries seeded from the singleton (``loopback_pkce``);
+        manually added entries (``manual:xai_pkce``) are independent
+        credentials with their own refresh-token lifecycle.
+        """
+        if self.provider != "xai-oauth" or entry.source != "loopback_pkce":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "xai-oauth")
+            if not isinstance(state, dict):
+                return entry
+            tokens = state.get("tokens")
+            if not isinstance(tokens, dict):
+                return entry
+            store_access = tokens.get("access_token", "")
+            store_refresh = tokens.get("refresh_token", "")
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if store_access and (
+                store_access != entry_access
+                or (store_refresh and store_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing xAI OAuth tokens from auth.json "
+                    "(refreshed by another process)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh or entry.refresh_token,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
+                }
+                if state.get("last_refresh"):
+                    field_updates["last_refresh"] = state["last_refresh"]
+                updated = replace(entry, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync xAI OAuth entry from auth.json: %s", exc)
+        return entry
+
     def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
         """Sync a Nous pool entry from auth.json if tokens differ.
 
@@ -604,9 +663,22 @@ class CredentialPool:
         re-seeding a consumed single-use refresh token.
 
         Applies to any OAuth provider whose singleton lives in auth.json
-        (currently Nous and OpenAI Codex).
+        (currently Nous, OpenAI Codex, and xAI Grok OAuth).
+
+        ``set_active=False`` on every write: a pool sync-back is a
+        token-rotation side effect, not the user choosing a provider.
+        Using ``_save_provider_state`` (which sets ``active_provider``)
+        here would mean every Nous/Codex/xAI refresh in a multi-provider
+        setup silently flips the ``active_provider`` flag — the next
+        ``hermes`` invocation that defaults to the active provider
+        (e.g. setup wizard, ``hermes auth status``) would land on
+        whatever provider happened to refresh last, not whatever the
+        user actually chose.
         """
-        if entry.source != "device_code":
+        # Only sync entries that were seeded *from* a singleton.  Manually
+        # added pool entries (source="manual:*") are independent credentials
+        # and must not write back to the singleton.
+        if entry.source not in {"device_code", "loopback_pkce"}:
             return
         try:
             with _auth_store_lock():
@@ -632,7 +704,7 @@ class CredentialPool:
                             state[extra_key] = val
                     if entry.inference_base_url:
                         state["inference_base_url"] = entry.inference_base_url
-                    _save_provider_state(auth_store, "nous", state)
+                    _store_provider_state(auth_store, "nous", state, set_active=False)
 
                 elif self.provider == "openai-codex":
                     state = _load_provider_state(auth_store, "openai-codex")
@@ -646,7 +718,21 @@ class CredentialPool:
                         tokens["refresh_token"] = entry.refresh_token
                     if entry.last_refresh:
                         state["last_refresh"] = entry.last_refresh
-                    _save_provider_state(auth_store, "openai-codex", state)
+                    _store_provider_state(auth_store, "openai-codex", state, set_active=False)
+
+                elif self.provider == "xai-oauth":
+                    state = _load_provider_state(auth_store, "xai-oauth")
+                    if not isinstance(state, dict):
+                        return
+                    tokens = state.get("tokens")
+                    if not isinstance(tokens, dict):
+                        return
+                    tokens["access_token"] = entry.access_token
+                    if entry.refresh_token:
+                        tokens["refresh_token"] = entry.refresh_token
+                    if entry.last_refresh:
+                        state["last_refresh"] = entry.last_refresh
+                    _store_provider_state(auth_store, "xai-oauth", state, set_active=False)
 
                 else:
                     return
@@ -699,6 +785,25 @@ class CredentialPool:
                     refresh_token=refreshed["refresh_token"],
                     last_refresh=refreshed.get("last_refresh"),
                 )
+            elif self.provider == "xai-oauth":
+                # Adopt fresher tokens from auth.json before spending the
+                # refresh_token — single-use tokens consumed by another
+                # process (or another profile sharing the singleton) would
+                # otherwise trigger ``refresh_token_reused`` on the next
+                # POST.  Only meaningful for singleton-seeded entries.
+                synced = self._sync_xai_oauth_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                refreshed = auth_mod.refresh_xai_oauth_pure(
+                    entry.access_token,
+                    entry.refresh_token,
+                )
+                updated = replace(
+                    entry,
+                    access_token=refreshed["access_token"],
+                    refresh_token=refreshed["refresh_token"],
+                    last_refresh=refreshed.get("last_refresh"),
+                )
             elif self.provider == "nous":
                 synced = self._sync_nous_entry_from_auth_store(entry)
                 if synced is not entry:
@@ -777,6 +882,30 @@ class CredentialPool:
                     # Credentials file had a valid (non-expired) token — use it directly
                     logger.debug("Credentials file has valid token, using without refresh")
                     return synced
+            # For xai-oauth: same race as nous — another process may have
+            # consumed the refresh token between our proactive sync and the
+            # HTTP call.  Re-check auth.json and adopt the fresh tokens if
+            # they have rotated since.  Only meaningful for singleton-seeded
+            # (loopback_pkce) entries; manual entries don't share state with
+            # the singleton.
+            if self.provider == "xai-oauth":
+                synced = self._sync_xai_oauth_entry_from_auth_store(entry)
+                if synced.refresh_token != entry.refresh_token:
+                    logger.debug(
+                        "xAI OAuth refresh failed but auth.json has newer tokens — adopting"
+                    )
+                    updated = replace(
+                        synced,
+                        last_status=STATUS_OK,
+                        last_status_at=None,
+                        last_error_code=None,
+                        last_error_reason=None,
+                        last_error_message=None,
+                        last_error_reset_at=None,
+                    )
+                    self._replace_entry(synced, updated)
+                    self._persist()
+                    return updated
             # For nous: another process may have consumed the refresh token
             # between our proactive sync and the HTTP call.  Re-sync from
             # auth.json and adopt the fresh tokens if available.
@@ -829,6 +958,11 @@ class CredentialPool:
                 entry.access_token,
                 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
             )
+        if self.provider == "xai-oauth":
+            return auth_mod._xai_access_token_is_expiring(
+                entry.access_token,
+                auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+            )
         if self.provider == "nous":
             # Nous refresh/mint can require network access and should happen when
             # runtime credentials are actually resolved, not merely when the pool
@@ -883,6 +1017,17 @@ class CredentialPool:
                 if synced is not entry:
                     entry = synced
                     cleared_any = True
+            # For xai-oauth singleton-seeded entries, identical pattern:
+            # an entry frozen as exhausted may simply be holding stale
+            # tokens that another process (or a fresh `hermes model` ->
+            # xAI Grok OAuth login) has since rotated in auth.json.
+            if (self.provider == "xai-oauth"
+                    and entry.source == "loopback_pkce"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_xai_oauth_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
             if entry.last_status == STATUS_EXHAUSTED:
                 exhausted_until = _exhausted_until(entry)
                 if exhausted_until is not None and now < exhausted_until:
@@ -1394,6 +1539,37 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                 },
             )
 
+    elif provider == "xai-oauth":
+        # When the user logs in via ``hermes model`` -> xAI Grok OAuth,
+        # tokens are written to the auth.json singleton
+        # (``providers["xai-oauth"]``).  Surface them in the pool too so
+        # ``hermes auth list`` reflects the logged-in state and so the pool
+        # is the single source of truth for refresh during runtime resolution.
+        if _is_suppressed(provider, "loopback_pkce"):
+            return changed, active_sources
+
+        state = _load_provider_state(auth_store, "xai-oauth")
+        tokens = state.get("tokens") if isinstance(state, dict) else None
+        if isinstance(tokens, dict) and tokens.get("access_token"):
+            active_sources.add("loopback_pkce")
+            from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
+
+            base_url = DEFAULT_XAI_OAUTH_BASE_URL
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                "loopback_pkce",
+                {
+                    "source": "loopback_pkce",
+                    "auth_type": AUTH_TYPE_OAUTH,
+                    "access_token": tokens.get("access_token", ""),
+                    "refresh_token": tokens.get("refresh_token"),
+                    "base_url": base_url,
+                    "last_refresh": state.get("last_refresh"),
+                    "label": label_from_token(tokens.get("access_token", ""), "loopback_pkce"),
+                },
+            )
+
     return changed, active_sources
 
 
diff --git a/agent/credential_sources.py b/agent/credential_sources.py
index 74204919248..ee035426023 100644
--- a/agent/credential_sources.py
+++ b/agent/credential_sources.py
@@ -265,6 +265,31 @@ def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
     return result
 
 
+def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
+    """xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
+
+    Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
+    itself: the central dispatcher only removes the in-memory pool entry,
+    leaves ``providers.xai-oauth`` in auth.json intact, and on the next
+    ``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
+    entry from the still-present singleton — credentials reappear with no
+    user feedback. Clearing the singleton in step with the suppression set
+    by the central dispatcher makes the removal stick.
+
+    Belt-and-braces against the manual entry path: ``hermes auth add
+    xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
+    falls through to "unregistered → nothing to clean up" (correct —
+    manual entries are pool-only).
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    result.hints.append(
+        "Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed."
+    )
+    return result
+
+
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
     """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
 
@@ -397,6 +422,11 @@ def _register_all_sources() -> None:
         remove_fn=_remove_codex_device_code,
         description="auth.json providers.openai-codex + ~/.codex/auth.json",
     ))
+    register(RemovalStep(
+        provider="xai-oauth", source_id="loopback_pkce",
+        remove_fn=_remove_xai_oauth_loopback_pkce,
+        description="auth.json providers.xai-oauth",
+    ))
     register(RemovalStep(
         provider="qwen-oauth", source_id="qwen-cli",
         remove_fn=_remove_qwen_cli,
diff --git a/agent/transports/codex.py b/agent/transports/codex.py
index 6738ed3220c..46169e971ba 100644
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -89,18 +89,25 @@ class ResponsesApiTransport(ProviderTransport):
         _effort_clamp = {"minimal": "low"}
         reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
 
+        response_tools = _responses_tools(tools)
         kwargs = {
             "model": model,
             "instructions": instructions,
             "input": _chat_messages_to_responses_input(payload_messages),
-            "tools": _responses_tools(tools),
-            "tool_choice": "auto",
-            "parallel_tool_calls": True,
+            "tools": response_tools,
             "store": False,
         }
+        if response_tools:
+            kwargs["tool_choice"] = "auto"
+            kwargs["parallel_tool_calls"] = True
 
         session_id = params.get("session_id")
-        if not is_github_responses and session_id:
+        # xAI's Responses API uses `prompt_cache_key` (body-level) as the
+        # cache-routing key, not a top-level kwarg — the body-field
+        # injection below survives openai SDK builds whose
+        # Responses.stream() signature drops the kwarg. Everything else
+        # that ISN'T github/xAI keeps using the typed kwarg.
+        if not is_github_responses and not is_xai_responses and session_id:
             kwargs["prompt_cache_key"] = session_id
 
         if reasoning_enabled and is_xai_responses:
@@ -165,6 +172,22 @@ class ResponsesApiTransport(ProviderTransport):
             merged_extra_headers["x-grok-conv-id"] = session_id
             kwargs["extra_headers"] = merged_extra_headers
 
+            # xAI Responses cache-routing field. Lives in the request body
+            # (per https://docs.x.ai/.../prompt-caching/maximizing-cache-hits),
+            # so we ship it via extra_body — the openai SDK serializes
+            # extra_body fields into the JSON body without per-field type
+            # validation, sidestepping the TypeError that fires on
+            # Responses.stream() builds whose `prompt_cache_key` kwarg has
+            # been dropped. Setdefault preserves a caller-supplied value
+            # (e.g. request_overrides.extra_body.prompt_cache_key) over
+            # the auto-derived session_id.
+            existing_extra_body = kwargs.get("extra_body")
+            merged_extra_body: Dict[str, Any] = {}
+            if isinstance(existing_extra_body, dict):
+                merged_extra_body.update(existing_extra_body)
+            merged_extra_body.setdefault("prompt_cache_key", session_id)
+            kwargs["extra_body"] = merged_extra_body
+
         return kwargs
 
     def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 2dcf6a03b45..8749cd9461c 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -72,6 +72,7 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+DEFAULT_XAI_OAUTH_BASE_URL = "https://api.x.ai/v1"
 MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
 MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
 MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
@@ -89,6 +90,14 @@ STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
+XAI_OAUTH_ISSUER = "https://auth.x.ai"
+XAI_OAUTH_DISCOVERY_URL = f"{XAI_OAUTH_ISSUER}/.well-known/openid-configuration"
+XAI_OAUTH_CLIENT_ID = "b1a00492-073a-47ea-816f-4c329264a828"
+XAI_OAUTH_SCOPE = "openid profile email offline_access grok-cli:access api:access"
+XAI_OAUTH_REDIRECT_HOST = "127.0.0.1"
+XAI_OAUTH_REDIRECT_PORT = 56121
+XAI_OAUTH_REDIRECT_PATH = "/callback"
+XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
 QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
 QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
 QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -162,6 +171,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         auth_type="oauth_external",
         inference_base_url=DEFAULT_CODEX_BASE_URL,
     ),
+    "xai-oauth": ProviderConfig(
+        id="xai-oauth",
+        name="xAI Grok OAuth (SuperGrok Subscription)",
+        auth_type="oauth_external",
+        inference_base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+    ),
     "qwen-oauth": ProviderConfig(
         id="qwen-oauth",
         name="Qwen OAuth",
@@ -1364,6 +1379,8 @@ def resolve_provider(
         "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
         "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
         "x-ai": "xai", "x.ai": "xai", "grok": "xai",
+        "xai-oauth": "xai-oauth", "x-ai-oauth": "xai-oauth",
+        "grok-oauth": "xai-oauth", "xai-grok-oauth": "xai-oauth",
         "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
         "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
         "step": "stepfun", "stepfun-coding-plan": "stepfun",
@@ -1907,6 +1924,16 @@ def _spotify_code_challenge(code_verifier: str) -> str:
     return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=")
 
 
+def _oauth_pkce_code_verifier(length: int = 64) -> str:
+    raw = base64.urlsafe_b64encode(os.urandom(length)).decode("ascii")
+    return raw.rstrip("=")[:128]
+
+
+def _oauth_pkce_code_challenge(code_verifier: str) -> str:
+    digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
+    return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=")
+
+
 def _spotify_build_authorize_url(
     *,
     client_id: str,
@@ -2029,6 +2056,158 @@ def _spotify_wait_for_callback(
     )
 
 
+def _xai_validate_loopback_redirect_uri(redirect_uri: str) -> tuple[str, int, str]:
+    parsed = urlparse(redirect_uri)
+    if parsed.scheme != "http":
+        raise AuthError(
+            "xAI OAuth redirect_uri must use http://127.0.0.1.",
+            provider="xai-oauth",
+            code="xai_redirect_invalid",
+        )
+    host = parsed.hostname or ""
+    if host != XAI_OAUTH_REDIRECT_HOST:
+        raise AuthError(
+            "xAI OAuth redirect_uri must point to 127.0.0.1.",
+            provider="xai-oauth",
+            code="xai_redirect_invalid",
+        )
+    if not parsed.port:
+        raise AuthError(
+            "xAI OAuth redirect_uri must include an explicit localhost port.",
+            provider="xai-oauth",
+            code="xai_redirect_invalid",
+        )
+    return host, parsed.port, parsed.path or "/"
+
+
+def _xai_callback_cors_origin(origin: Optional[str]) -> str:
+    allowed = {
+        "https://accounts.x.ai",
+        "https://auth.x.ai",
+        "https://accounts.mouseion.dev",
+        "http://localhost:20000",
+        "http://127.0.0.1:20000",
+    }
+    return origin if origin in allowed else ""
+
+
+def _make_xai_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequestHandler], dict[str, Any]]:
+    result: dict[str, Any] = {
+        "code": None,
+        "state": None,
+        "error": None,
+        "error_description": None,
+    }
+
+    class _XAICallbackHandler(BaseHTTPRequestHandler):
+        def _maybe_write_cors_headers(self) -> None:
+            origin = self.headers.get("Origin")
+            allow_origin = _xai_callback_cors_origin(origin)
+            if allow_origin:
+                self.send_header("Access-Control-Allow-Origin", allow_origin)
+                self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS")
+                self.send_header("Access-Control-Allow-Headers", "Content-Type")
+                self.send_header("Access-Control-Allow-Private-Network", "true")
+                self.send_header("Vary", "Origin")
+
+        def do_OPTIONS(self) -> None:  # noqa: N802
+            self.send_response(204)
+            self._maybe_write_cors_headers()
+            self.end_headers()
+
+        def do_GET(self) -> None:  # noqa: N802
+            parsed = urlparse(self.path)
+            if parsed.path != expected_path:
+                self.send_response(404)
+                self.end_headers()
+                self.wfile.write(b"Not found.")
+                return
+
+            params = parse_qs(parsed.query)
+            result["code"] = params.get("code", [None])[0]
+            result["state"] = params.get("state", [None])[0]
+            result["error"] = params.get("error", [None])[0]
+            result["error_description"] = params.get("error_description", [None])[0]
+
+            self.send_response(200)
+            self._maybe_write_cors_headers()
+            self.send_header("Content-Type", "text/html; charset=utf-8")
+            self.end_headers()
+            if result["error"]:
+                body = "<html><body><h1>xAI authorization failed.</h1>You can close this tab.</body></html>"
+            else:
+                body = "<html><body><h1>xAI authorization received.</h1>You can close this tab.</body></html>"
+            self.wfile.write(body.encode("utf-8"))
+
+        def log_message(self, format: str, *args: Any) -> None:  # noqa: A003
+            return
+
+    return _XAICallbackHandler, result
+
+
+def _xai_start_callback_server(
+    preferred_port: int = XAI_OAUTH_REDIRECT_PORT,
+) -> tuple[HTTPServer, threading.Thread, dict[str, Any], str]:
+    host = XAI_OAUTH_REDIRECT_HOST
+    expected_path = XAI_OAUTH_REDIRECT_PATH
+    handler_cls, result = _make_xai_callback_handler(expected_path)
+
+    class _ReuseHTTPServer(HTTPServer):
+        allow_reuse_address = True
+
+    ports_to_try = [preferred_port]
+    if preferred_port != 0:
+        ports_to_try.append(0)
+    server = None
+    last_error: Optional[OSError] = None
+    for port in ports_to_try:
+        try:
+            server = _ReuseHTTPServer((host, port), handler_cls)
+            break
+        except OSError as exc:
+            last_error = exc
+    if server is None:
+        raise AuthError(
+            f"Could not bind xAI callback server on {host}:{preferred_port}: {last_error}",
+            provider="xai-oauth",
+            code="xai_callback_bind_failed",
+        ) from last_error
+
+    actual_port = int(server.server_address[1])
+    redirect_uri = f"http://{host}:{actual_port}{expected_path}"
+    thread = threading.Thread(
+        target=server.serve_forever,
+        kwargs={"poll_interval": 0.1},
+        daemon=True,
+    )
+    thread.start()
+    return server, thread, result, redirect_uri
+
+
+def _xai_wait_for_callback(
+    server: HTTPServer,
+    thread: threading.Thread,
+    result: dict[str, Any],
+    *,
+    timeout_seconds: float = 180.0,
+) -> dict[str, Any]:
+    deadline = time.monotonic() + max(5.0, timeout_seconds)
+    try:
+        while time.monotonic() < deadline:
+            if result["code"] or result["error"]:
+                return result
+            time.sleep(0.1)
+    finally:
+        server.shutdown()
+        server.server_close()
+        thread.join(timeout=1.0)
+    raise AuthError(
+        "xAI authorization timed out waiting for the local callback.",
+        provider="xai-oauth",
+        code="xai_callback_timeout",
+    )
+
+
 def _spotify_token_payload_to_state(
     token_payload: Dict[str, Any],
     *,
@@ -2680,6 +2859,348 @@ def resolve_codex_runtime_credentials(
     }
 
 
+# =============================================================================
+# xAI Grok OAuth — tokens stored in ~/.hermes/auth.json
+# =============================================================================
+
+def _read_xai_oauth_tokens(*, _lock: bool = True) -> Dict[str, Any]:
+    if _lock:
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+    else:
+        auth_store = _load_auth_store()
+    state = _load_provider_state(auth_store, "xai-oauth")
+    if not state:
+        raise AuthError(
+            "No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok Subscription) in `hermes model`.",
+            provider="xai-oauth",
+            code="xai_auth_missing",
+            relogin_required=True,
+        )
+    tokens = state.get("tokens")
+    if not isinstance(tokens, dict):
+        raise AuthError(
+            "xAI OAuth state is missing tokens. Re-authenticate with `hermes model`.",
+            provider="xai-oauth",
+            code="xai_auth_invalid_shape",
+            relogin_required=True,
+        )
+    access_token = str(tokens.get("access_token", "") or "").strip()
+    refresh_token = str(tokens.get("refresh_token", "") or "").strip()
+    if not access_token:
+        raise AuthError(
+            "xAI OAuth state is missing access_token. Re-authenticate with `hermes model`.",
+            provider="xai-oauth",
+            code="xai_auth_missing_access_token",
+            relogin_required=True,
+        )
+    if not refresh_token:
+        raise AuthError(
+            "xAI OAuth state is missing refresh_token. Re-authenticate with `hermes model`.",
+            provider="xai-oauth",
+            code="xai_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+    return {
+        "tokens": tokens,
+        "last_refresh": state.get("last_refresh"),
+        "discovery": state.get("discovery") or {},
+        "redirect_uri": state.get("redirect_uri"),
+    }
+
+
+def _save_xai_oauth_tokens(
+    tokens: Dict[str, Any],
+    *,
+    discovery: Optional[Dict[str, Any]] = None,
+    redirect_uri: str = "",
+    last_refresh: Optional[str] = None,
+) -> None:
+    if last_refresh is None:
+        last_refresh = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        state = _load_provider_state(auth_store, "xai-oauth") or {}
+        state["tokens"] = tokens
+        state["last_refresh"] = last_refresh
+        state["auth_mode"] = "oauth_pkce"
+        if discovery:
+            state["discovery"] = discovery
+        if redirect_uri:
+            state["redirect_uri"] = redirect_uri
+        _save_provider_state(auth_store, "xai-oauth", state)
+        _save_auth_store(auth_store)
+
+
+def _xai_access_token_is_expiring(access_token: str, skew_seconds: int = 0) -> bool:
+    if not isinstance(access_token, str) or "." not in access_token:
+        return False
+    try:
+        parts = access_token.split(".")
+        if len(parts) < 2:
+            return False
+        payload_b64 = parts[1]
+        payload_b64 += "=" * (-len(payload_b64) % 4)
+        payload = json.loads(base64.urlsafe_b64decode(payload_b64.encode("ascii")).decode("utf-8"))
+        exp = payload.get("exp")
+        if not isinstance(exp, (int, float)):
+            return False
+        return float(exp) <= (time.time() + max(0, int(skew_seconds)))
+    except Exception:
+        return False
+
+
+def _xai_validate_oauth_endpoint(url: str, *, field: str) -> str:
+    """Refuse any OIDC discovery endpoint that isn't HTTPS on the xAI origin.
+
+    The OIDC discovery response is a long-lived, low-frequency request whose
+    output is cached in ``~/.hermes/auth.json``. A single MITM during initial
+    login could substitute a malicious ``token_endpoint``; that URL would
+    then receive the refresh_token on every subsequent refresh — a permanent
+    credential leak from a one-time MITM. Validating scheme + host pins the
+    cached endpoint to the xAI auth origin (or a future ``*.x.ai`` subdomain
+    if xAI migrates) so the cache poisoning loses its persistence guarantee.
+
+    RFC 8414 §2 requires the issuer to be ``https://`` and SHOULD-keeps the
+    token_endpoint on the same origin; we enforce both. ``x.ai`` is the
+    bare apex, so we accept either exact host match or any ``.x.ai`` suffix.
+    """
+    parsed = urlparse(url)
+    if parsed.scheme != "https":
+        raise AuthError(
+            f"xAI OIDC discovery returned a non-HTTPS {field}: {url!r}.",
+            provider="xai-oauth",
+            code="xai_discovery_invalid",
+        )
+    host = (parsed.hostname or "").lower()
+    if not host:
+        raise AuthError(
+            f"xAI OIDC discovery {field} is missing a hostname: {url!r}.",
+            provider="xai-oauth",
+            code="xai_discovery_invalid",
+        )
+    if host != "x.ai" and not host.endswith(".x.ai"):
+        raise AuthError(
+            f"xAI OIDC discovery {field} host {host!r} is not on the xAI origin "
+            f"(expected x.ai or a *.x.ai subdomain). Refusing to use a cached "
+            f"endpoint that may have been substituted by a MITM during initial "
+            f"discovery; re-authenticate with `hermes model` to re-fetch.",
+            provider="xai-oauth",
+            code="xai_discovery_invalid",
+        )
+    return url
+
+
+def _xai_oauth_discovery(timeout_seconds: float = 15.0) -> Dict[str, str]:
+    try:
+        response = httpx.get(
+            XAI_OAUTH_DISCOVERY_URL,
+            headers={"Accept": "application/json"},
+            timeout=timeout_seconds,
+        )
+    except Exception as exc:
+        raise AuthError(
+            f"xAI OIDC discovery failed: {exc}",
+            provider="xai-oauth",
+            code="xai_discovery_failed",
+        ) from exc
+    if response.status_code != 200:
+        raise AuthError(
+            f"xAI OIDC discovery returned status {response.status_code}.",
+            provider="xai-oauth",
+            code="xai_discovery_failed",
+        )
+    try:
+        payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            f"xAI OIDC discovery returned invalid JSON: {exc}",
+            provider="xai-oauth",
+            code="xai_discovery_invalid_json",
+        ) from exc
+    if not isinstance(payload, dict):
+        raise AuthError(
+            "xAI OIDC discovery response was not a JSON object.",
+            provider="xai-oauth",
+            code="xai_discovery_incomplete",
+        )
+    authorization_endpoint = str(payload.get("authorization_endpoint", "") or "").strip()
+    token_endpoint = str(payload.get("token_endpoint", "") or "").strip()
+    if not authorization_endpoint or not token_endpoint:
+        raise AuthError(
+            "xAI OIDC discovery response was missing required endpoints.",
+            provider="xai-oauth",
+            code="xai_discovery_incomplete",
+        )
+    _xai_validate_oauth_endpoint(authorization_endpoint, field="authorization_endpoint")
+    _xai_validate_oauth_endpoint(token_endpoint, field="token_endpoint")
+    return {
+        "authorization_endpoint": authorization_endpoint,
+        "token_endpoint": token_endpoint,
+    }
+
+
+def refresh_xai_oauth_pure(
+    access_token: str,
+    refresh_token: str,
+    *,
+    token_endpoint: str = "",
+    timeout_seconds: float = 20.0,
+) -> Dict[str, Any]:
+    del access_token
+    if not isinstance(refresh_token, str) or not refresh_token.strip():
+        raise AuthError(
+            "xAI OAuth is missing refresh_token. Re-authenticate with `hermes model`.",
+            provider="xai-oauth",
+            code="xai_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+    endpoint = token_endpoint.strip() or _xai_oauth_discovery(timeout_seconds)["token_endpoint"]
+    # Re-validate cached endpoints on the refresh hot path: an auth.json
+    # written by an older Hermes (or hand-edited) may carry a non-xAI
+    # token_endpoint that would receive every future refresh_token in
+    # plaintext if we trusted it blindly. Cheap suffix check; fast-fail
+    # with a clear error so the user can re-run `hermes model` to refetch.
+    _xai_validate_oauth_endpoint(endpoint, field="token_endpoint")
+    timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
+        response = client.post(
+            endpoint,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "grant_type": "refresh_token",
+                "client_id": XAI_OAUTH_CLIENT_ID,
+                "refresh_token": refresh_token,
+            },
+        )
+    if response.status_code != 200:
+        detail = response.text.strip()
+        raise AuthError(
+            "xAI token refresh failed."
+            + (f" Response: {detail}" if detail else ""),
+            provider="xai-oauth",
+            code="xai_refresh_failed",
+            relogin_required=(response.status_code in {400, 401, 403}),
+        )
+    try:
+        payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            f"xAI token refresh returned invalid JSON: {exc}",
+            provider="xai-oauth",
+            code="xai_refresh_invalid_json",
+        ) from exc
+    if not isinstance(payload, dict):
+        raise AuthError(
+            "xAI token refresh response was not a JSON object.",
+            provider="xai-oauth",
+            code="xai_refresh_invalid_response",
+            relogin_required=True,
+        )
+    refreshed_access = str(payload.get("access_token", "") or "").strip()
+    if not refreshed_access:
+        raise AuthError(
+            "xAI token refresh response was missing access_token.",
+            provider="xai-oauth",
+            code="xai_refresh_missing_access_token",
+            relogin_required=True,
+        )
+    updated = {
+        "access_token": refreshed_access,
+        "refresh_token": str(payload.get("refresh_token") or refresh_token).strip(),
+        "id_token": str(payload.get("id_token") or "").strip(),
+        "expires_in": payload.get("expires_in"),
+        "token_type": str(payload.get("token_type") or "Bearer").strip() or "Bearer",
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+    }
+    return updated
+
+
+def _refresh_xai_oauth_tokens(
+    tokens: Dict[str, Any],
+    *,
+    token_endpoint: str,
+    redirect_uri: str = "",
+    timeout_seconds: float,
+) -> Dict[str, Any]:
+    refreshed = refresh_xai_oauth_pure(
+        str(tokens.get("access_token", "") or ""),
+        str(tokens.get("refresh_token", "") or ""),
+        token_endpoint=token_endpoint,
+        timeout_seconds=timeout_seconds,
+    )
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = refreshed["access_token"]
+    updated_tokens["refresh_token"] = refreshed["refresh_token"]
+    if refreshed.get("id_token"):
+        updated_tokens["id_token"] = refreshed["id_token"]
+    if refreshed.get("expires_in") is not None:
+        updated_tokens["expires_in"] = refreshed["expires_in"]
+    if refreshed.get("token_type"):
+        updated_tokens["token_type"] = refreshed["token_type"]
+    _save_xai_oauth_tokens(
+        updated_tokens,
+        discovery={"token_endpoint": token_endpoint},
+        redirect_uri=redirect_uri,
+        last_refresh=refreshed["last_refresh"],
+    )
+    return updated_tokens
+
+
+def resolve_xai_oauth_runtime_credentials(
+    *,
+    force_refresh: bool = False,
+    refresh_if_expiring: bool = True,
+    refresh_skew_seconds: int = XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    data = _read_xai_oauth_tokens()
+    tokens = dict(data["tokens"])
+    access_token = str(tokens.get("access_token", "") or "").strip()
+    refresh_timeout_seconds = float(os.getenv("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", "20"))
+    discovery = dict(data.get("discovery") or {})
+    token_endpoint = str(discovery.get("token_endpoint", "") or "").strip()
+    redirect_uri = str(data.get("redirect_uri", "") or "").strip()
+
+    should_refresh = bool(force_refresh)
+    if (not should_refresh) and refresh_if_expiring:
+        should_refresh = _xai_access_token_is_expiring(access_token, refresh_skew_seconds)
+    if should_refresh:
+        with _auth_store_lock(timeout_seconds=max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)):
+            data = _read_xai_oauth_tokens(_lock=False)
+            tokens = dict(data["tokens"])
+            access_token = str(tokens.get("access_token", "") or "").strip()
+            discovery = dict(data.get("discovery") or {})
+            token_endpoint = str(discovery.get("token_endpoint", "") or "").strip()
+            redirect_uri = str(data.get("redirect_uri", "") or "").strip()
+            should_refresh = bool(force_refresh)
+            if (not should_refresh) and refresh_if_expiring:
+                should_refresh = _xai_access_token_is_expiring(access_token, refresh_skew_seconds)
+            if should_refresh:
+                if not token_endpoint:
+                    token_endpoint = _xai_oauth_discovery(refresh_timeout_seconds)["token_endpoint"]
+                tokens = _refresh_xai_oauth_tokens(
+                    tokens,
+                    token_endpoint=token_endpoint,
+                    redirect_uri=redirect_uri,
+                    timeout_seconds=refresh_timeout_seconds,
+                )
+                access_token = str(tokens.get("access_token", "") or "").strip()
+
+    base_url = (
+        os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
+        or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_XAI_OAUTH_BASE_URL
+    )
+    return {
+        "provider": "xai-oauth",
+        "base_url": base_url,
+        "api_key": access_token,
+        "source": "hermes-auth-store",
+        "last_refresh": data.get("last_refresh"),
+        "auth_mode": "oauth_pkce",
+    }
+
+
 # =============================================================================
 # TLS verification helper
 # =============================================================================
@@ -4030,6 +4551,48 @@ def get_codex_auth_status() -> Dict[str, Any]:
         }
 
 
+def get_xai_oauth_auth_status() -> Dict[str, Any]:
+    try:
+        from agent.credential_pool import load_pool
+
+        pool = load_pool("xai-oauth")
+        if pool and pool.has_credentials():
+            entry = pool.select()
+            if entry is not None:
+                api_key = (
+                    getattr(entry, "runtime_api_key", None)
+                    or getattr(entry, "access_token", "")
+                )
+                if api_key and not _xai_access_token_is_expiring(api_key, 0):
+                    return {
+                        "logged_in": True,
+                        "auth_store": str(_auth_file_path()),
+                        "last_refresh": getattr(entry, "last_refresh", None),
+                        "auth_mode": "oauth_pkce",
+                        "source": f"pool:{getattr(entry, 'label', 'unknown')}",
+                        "api_key": api_key,
+                    }
+    except Exception:
+        pass
+
+    try:
+        creds = resolve_xai_oauth_runtime_credentials()
+        return {
+            "logged_in": True,
+            "auth_store": str(_auth_file_path()),
+            "last_refresh": creds.get("last_refresh"),
+            "auth_mode": creds.get("auth_mode"),
+            "source": creds.get("source"),
+            "api_key": creds.get("api_key"),
+        }
+    except AuthError as exc:
+        return {
+            "logged_in": False,
+            "auth_store": str(_auth_file_path()),
+            "error": str(exc),
+        }
+
+
 def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
     """Status snapshot for API-key providers (z.ai, Kimi, MiniMax)."""
     pconfig = PROVIDER_REGISTRY.get(provider_id)
@@ -4100,6 +4663,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
         return get_nous_auth_status()
     if target == "openai-codex":
         return get_codex_auth_status()
+    if target == "xai-oauth":
+        return get_xai_oauth_auth_status()
     if target == "qwen-oauth":
         return get_qwen_auth_status()
     if target == "google-gemini-cli":
@@ -4320,7 +4885,7 @@ def _logout_default_provider_from_config() -> Optional[str]:
     "No provider is currently logged in" and never reset model.provider.
     """
     provider = _get_config_provider()
-    if provider in {"nous", "openai-codex"}:
+    if provider in {"nous", "openai-codex", "xai-oauth"}:
         return provider
     return None
 
@@ -4619,6 +5184,245 @@ def _login_openai_codex(
     print(f"  Config updated: {config_path} (model.provider=openai-codex)")
 
 
+def _login_xai_oauth(
+    args,
+    pconfig: ProviderConfig,
+    *,
+    force_new_login: bool = False,
+) -> None:
+    del pconfig
+
+    if not force_new_login:
+        try:
+            existing = resolve_xai_oauth_runtime_credentials()
+            api_key = existing.get("api_key", "")
+            if isinstance(api_key, str) and api_key and not _xai_access_token_is_expiring(api_key, 60):
+                print("Existing xAI OAuth credentials found in Hermes auth store.")
+                try:
+                    reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+                except (EOFError, KeyboardInterrupt):
+                    reuse = "y"
+                if reuse in ("", "y", "yes"):
+                    config_path = _update_config_for_provider(
+                        "xai-oauth",
+                        existing.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL),
+                    )
+                    print()
+                    print("Login successful!")
+                    print(f"  Config updated: {config_path} (model.provider=xai-oauth)")
+                    return
+        except AuthError:
+            pass
+
+    print()
+    print("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
+    print("(Hermes creates its own local OAuth session)")
+    print()
+
+    timeout_seconds = float(getattr(args, "timeout", None) or 20.0)
+    open_browser = not getattr(args, "no_browser", False)
+    if _is_remote_session():
+        open_browser = False
+
+    creds = _xai_oauth_loopback_login(timeout_seconds=timeout_seconds, open_browser=open_browser)
+    _save_xai_oauth_tokens(
+        creds["tokens"],
+        discovery=creds.get("discovery"),
+        redirect_uri=creds.get("redirect_uri", ""),
+        last_refresh=creds.get("last_refresh"),
+    )
+    config_path = _update_config_for_provider("xai-oauth", creds.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL))
+    print()
+    print("Login successful!")
+    from hermes_constants import display_hermes_home as _dhh
+    print(f"  Auth state: {_dhh()}/auth.json")
+    print(f"  Config updated: {config_path} (model.provider=xai-oauth)")
+
+
+def _xai_oauth_build_authorize_url(
+    *,
+    authorization_endpoint: str,
+    redirect_uri: str,
+    code_challenge: str,
+    state: str,
+    nonce: str,
+) -> str:
+    # `plan=generic` opts the consent screen into xAI's generic OAuth plan
+    # tier instead of falling back to the per-account default. Without it,
+    # accounts.x.ai rejects loopback OAuth from non-allowlisted clients.
+    # `referrer=hermes-agent` lets xAI attribute Hermes-originated logins
+    # in their OAuth server logs (we still impersonate the upstream Grok-CLI
+    # client_id; this is best-effort attribution until xAI mints us our own).
+    authorize_params = {
+        "response_type": "code",
+        "client_id": XAI_OAUTH_CLIENT_ID,
+        "redirect_uri": redirect_uri,
+        "scope": XAI_OAUTH_SCOPE,
+        "code_challenge": code_challenge,
+        "code_challenge_method": "S256",
+        "state": state,
+        "nonce": nonce,
+        "plan": "generic",
+        "referrer": "hermes-agent",
+    }
+    return f"{authorization_endpoint}?{urlencode(authorize_params)}"
+
+
+def _xai_oauth_loopback_login(
+    *,
+    timeout_seconds: float = 20.0,
+    open_browser: bool = True,
+) -> Dict[str, Any]:
+    discovery = _xai_oauth_discovery(timeout_seconds)
+    authorization_endpoint = discovery["authorization_endpoint"]
+    token_endpoint = discovery["token_endpoint"]
+
+    server, thread, callback_result, redirect_uri = _xai_start_callback_server()
+    try:
+        _xai_validate_loopback_redirect_uri(redirect_uri)
+        code_verifier = _oauth_pkce_code_verifier()
+        code_challenge = _oauth_pkce_code_challenge(code_verifier)
+        state = uuid.uuid4().hex
+        nonce = uuid.uuid4().hex
+        authorize_url = _xai_oauth_build_authorize_url(
+            authorization_endpoint=authorization_endpoint,
+            redirect_uri=redirect_uri,
+            code_challenge=code_challenge,
+            state=state,
+            nonce=nonce,
+        )
+
+        print("Open this URL to authorize Hermes with xAI:")
+        print(authorize_url)
+        print()
+        print(f"Waiting for callback on {redirect_uri}")
+
+        if open_browser and not _is_remote_session():
+            try:
+                opened = webbrowser.open(authorize_url)
+            except Exception:
+                opened = False
+            if opened:
+                print("Browser opened for xAI authorization.")
+            else:
+                print("Could not open the browser automatically; use the URL above.")
+
+        callback = _xai_wait_for_callback(
+            server,
+            thread,
+            callback_result,
+            timeout_seconds=max(30.0, timeout_seconds * 9),
+        )
+    except Exception:
+        try:
+            server.shutdown()
+            server.server_close()
+        except Exception:
+            pass
+        try:
+            thread.join(timeout=1.0)
+        except Exception:
+            pass
+        raise
+
+    if callback.get("error"):
+        detail = callback.get("error_description") or callback["error"]
+        raise AuthError(
+            f"xAI authorization failed: {detail}",
+            provider="xai-oauth",
+            code="xai_authorization_failed",
+        )
+    if callback.get("state") != state:
+        raise AuthError(
+            "xAI authorization failed: state mismatch.",
+            provider="xai-oauth",
+            code="xai_state_mismatch",
+        )
+    code = str(callback.get("code") or "").strip()
+    if not code:
+        raise AuthError(
+            "xAI authorization failed: missing authorization code.",
+            provider="xai-oauth",
+            code="xai_code_missing",
+        )
+
+    try:
+        response = httpx.post(
+            token_endpoint,
+            headers={"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"},
+            data={
+                "grant_type": "authorization_code",
+                "code": code,
+                "redirect_uri": redirect_uri,
+                "client_id": XAI_OAUTH_CLIENT_ID,
+                "code_verifier": code_verifier,
+            },
+            timeout=max(20.0, timeout_seconds),
+        )
+    except Exception as exc:
+        raise AuthError(
+            f"xAI token exchange failed: {exc}",
+            provider="xai-oauth",
+            code="xai_token_exchange_failed",
+        ) from exc
+    if response.status_code != 200:
+        detail = response.text.strip()
+        raise AuthError(
+            "xAI token exchange failed."
+            + (f" Response: {detail}" if detail else ""),
+            provider="xai-oauth",
+            code="xai_token_exchange_failed",
+        )
+    try:
+        payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            f"xAI token exchange returned invalid JSON: {exc}",
+            provider="xai-oauth",
+            code="xai_token_exchange_invalid",
+        ) from exc
+    if not isinstance(payload, dict):
+        raise AuthError(
+            "xAI token exchange response was not a JSON object.",
+            provider="xai-oauth",
+            code="xai_token_exchange_invalid",
+        )
+    access_token = str(payload.get("access_token", "") or "").strip()
+    refresh_token = str(payload.get("refresh_token", "") or "").strip()
+    if not access_token:
+        raise AuthError(
+            "xAI token exchange did not return an access_token.",
+            provider="xai-oauth",
+            code="xai_token_exchange_invalid",
+        )
+    if not refresh_token:
+        raise AuthError(
+            "xAI token exchange did not return a refresh_token.",
+            provider="xai-oauth",
+            code="xai_token_exchange_invalid",
+        )
+
+    base_url = (
+        os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
+        or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_XAI_OAUTH_BASE_URL
+    )
+    return {
+        "tokens": {
+            "access_token": access_token,
+            "refresh_token": refresh_token,
+            "id_token": str(payload.get("id_token", "") or "").strip(),
+            "expires_in": payload.get("expires_in"),
+            "token_type": str(payload.get("token_type") or "Bearer").strip() or "Bearer",
+        },
+        "discovery": discovery,
+        "redirect_uri": redirect_uri,
+        "base_url": base_url,
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+        "source": "oauth-loopback",
+    }
+
+
 def _codex_device_code_login() -> Dict[str, Any]:
     """Run the OpenAI device code login flow and return credentials dict."""
     import time as _time
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 65cb7ed1b85..10b040d8a1d 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL
 
 
 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "xai-oauth", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}
 
 
 def _get_custom_provider_names() -> list:
@@ -77,6 +77,8 @@ def _normalize_provider(provider: str) -> str:
     normalized = (provider or "").strip().lower()
     if normalized in {"or", "open-router"}:
         return "openrouter"
+    if normalized in {"grok-oauth", "xai-oauth", "x-ai-oauth", "xai-grok-oauth"}:
+        return "xai-oauth"
     # Check if it matches a custom provider name
     custom_key = _resolve_custom_provider_input(normalized)
     if custom_key:
@@ -170,7 +172,7 @@ def auth_add_command(args) -> None:
         if provider.startswith(CUSTOM_POOL_PREFIX):
             requested_type = AUTH_TYPE_API_KEY
         else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in _OAUTH_CAPABLE_PROVIDERS else AUTH_TYPE_API_KEY
 
     pool = load_pool(provider)
 
@@ -333,6 +335,31 @@ def auth_add_command(args) -> None:
         print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
         return
 
+    if provider == "xai-oauth":
+        creds = auth_mod._xai_oauth_loopback_login(
+            timeout_seconds=getattr(args, "timeout", None) or 20.0,
+            open_browser=not getattr(args, "no_browser", False),
+        )
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["tokens"]["access_token"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:xai_pkce",
+            access_token=creds["tokens"]["access_token"],
+            refresh_token=creds["tokens"].get("refresh_token"),
+            base_url=creds.get("base_url"),
+            last_refresh=creds.get("last_refresh"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
     if provider == "google-gemini-cli":
         from agent.google_oauth import run_gemini_oauth_login_pure
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 833172a23b9..c7ac1100816 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1932,6 +1932,8 @@ def select_provider_and_model(args=None):
         _model_flow_nous(config, current_model, args=args)
     elif selected_provider == "openai-codex":
         _model_flow_openai_codex(config, current_model)
+    elif selected_provider == "xai-oauth":
+        _model_flow_xai_oauth(config, current_model)
     elif selected_provider == "qwen-oauth":
         _model_flow_qwen_oauth(config, current_model)
     elif selected_provider == "minimax-oauth":
@@ -2813,6 +2815,87 @@ def _model_flow_openai_codex(config, current_model=""):
         print("No change.")
 
 
+def _model_flow_xai_oauth(_config, current_model=""):
+    """xAI Grok OAuth (SuperGrok Subscription) provider: ensure logged in, then pick model."""
+    from hermes_cli.auth import (
+        get_xai_oauth_auth_status,
+        _prompt_model_selection,
+        _save_model_choice,
+        _update_config_for_provider,
+        resolve_xai_oauth_runtime_credentials,
+        _login_xai_oauth,
+        DEFAULT_XAI_OAUTH_BASE_URL,
+        PROVIDER_REGISTRY,
+    )
+    from hermes_cli.models import _PROVIDER_MODELS
+
+    status = get_xai_oauth_auth_status()
+    if status.get("logged_in"):
+        print("  xAI Grok OAuth (SuperGrok Subscription) credentials: ✓")
+        print()
+        print("    1. Use existing credentials")
+        print("    2. Reauthenticate (new OAuth login)")
+        print("    3. Cancel")
+        print()
+        try:
+            choice = input("  Choice [1/2/3]: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            choice = "1"
+
+        if choice == "2":
+            print("Starting a fresh xAI OAuth login...")
+            print()
+            try:
+                mock_args = argparse.Namespace()
+                _login_xai_oauth(
+                    mock_args,
+                    PROVIDER_REGISTRY["xai-oauth"],
+                    force_new_login=True,
+                )
+            except SystemExit:
+                print("Login cancelled or failed.")
+                return
+            except Exception as exc:
+                print(f"Login failed: {exc}")
+                return
+        elif choice == "3":
+            return
+    else:
+        print("Not logged into xAI Grok OAuth (SuperGrok Subscription). Starting login...")
+        print()
+        try:
+            mock_args = argparse.Namespace()
+            _login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"])
+        except SystemExit:
+            print("Login cancelled or failed.")
+            return
+        except Exception as exc:
+            print(f"Login failed: {exc}")
+            return
+
+    # Resolve a usable base URL.  ``resolve_xai_oauth_runtime_credentials``
+    # only reads from the auth.json singleton — but credentials may legitimately
+    # live only in the pool (e.g. after ``hermes auth add xai-oauth``).  Fall
+    # back to the default base URL in that case so the model picker still
+    # completes successfully instead of bailing out with
+    # ``Could not resolve xAI OAuth credentials``.
+    base_url = DEFAULT_XAI_OAUTH_BASE_URL
+    try:
+        creds = resolve_xai_oauth_runtime_credentials()
+        base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url
+    except Exception:
+        pass
+
+    models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or [])
+    selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-code-fast-1"))
+    if selected:
+        _save_model_choice(selected)
+        _update_config_for_provider("xai-oauth", base_url)
+        print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok Subscription)")
+    else:
+        print("No change.")
+
+
 _DEFAULT_QWEN_PORTAL_MODELS = [
     "qwen3-coder-plus",
     "qwen3-coder",
@@ -9400,7 +9483,7 @@ def _build_provider_choices() -> list[str]:
     except Exception:
         # Fallback: static list guarantees the CLI always works
         return [
-            "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
+            "auto", "openrouter", "nous", "openai-codex", "xai-oauth", "copilot-acp", "copilot",
             "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
             "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
             "stepfun", "minimax", "minimax-cn", "kilocode", "novita", "xiaomi", "arcee",
@@ -9931,7 +10014,7 @@ def main():
     )
     login_parser.add_argument(
         "--provider",
-        choices=["nous", "openai-codex"],
+        choices=["nous", "openai-codex", "xai-oauth"],
         default=None,
         help="Provider to authenticate with (default: nous)",
     )
@@ -9977,7 +10060,7 @@ def main():
     )
     logout_parser.add_argument(
         "--provider",
-        choices=["nous", "openai-codex", "spotify"],
+        choices=["nous", "openai-codex", "xai-oauth", "spotify"],
         default=None,
         help="Provider to log out from (default: active provider)",
     )
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index bc41132f5d5..ded3f448f87 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -116,13 +116,23 @@ def _codex_curated_models() -> list[str]:
 # (grok-4, grok-4-0709, grok-4-fast{,-reasoning,-non-reasoning},
 #  grok-4-1-fast{,-reasoning,-non-reasoning}, grok-code-fast-1 → grok-4.3).
 _XAI_STATIC_FALLBACK: list[str] = [
+    "grok-4.3",
     "grok-4.20-0309-reasoning",
     "grok-4.20-0309-non-reasoning",
     "grok-4.20-multi-agent-0309",
-    "grok-4.3",
 ]
 
 
+_XAI_TOP_MODEL = "grok-4.3"
+
+
+def _xai_promote_top(ids: list[str]) -> list[str]:
+    """Pin the headline xAI model to the top of the curated list."""
+    if _XAI_TOP_MODEL in ids:
+        return [_XAI_TOP_MODEL] + [m for m in ids if m != _XAI_TOP_MODEL]
+    return ids
+
+
 def _xai_curated_models() -> list[str]:
     """Derive the xAI-direct curated list from models.dev disk cache.
 
@@ -142,7 +152,7 @@ def _xai_curated_models() -> list[str]:
         if isinstance(models, dict) and models:
             ids = [mid for mid in models.keys() if isinstance(mid, str)]
             if ids:
-                return sorted(ids)
+                return _xai_promote_top(sorted(ids))
     except Exception:
         # Any failure (missing file, malformed JSON, import error)
         # falls through to the static list.
@@ -190,6 +200,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "gpt-4o-mini",
     ],
     "openai-codex": _codex_curated_models(),
+    "xai-oauth": _xai_curated_models(),
     "copilot-acp": [
         "copilot-acp",
     ],
@@ -918,6 +929,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
     ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
     ProviderEntry("alibaba",        "Qwen Cloud",               "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
+    ProviderEntry("xai-oauth",      "xAI Grok OAuth (SuperGrok Subscription)", "xAI Grok OAuth (SuperGrok Subscription)"),
     ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
     ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
     ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
@@ -1036,6 +1048,10 @@ _PROVIDER_ALIASES = {
     "amazon-bedrock": "bedrock",
     "amazon": "bedrock",
     "grok": "xai",
+    "grok-oauth": "xai-oauth",
+    "xai-oauth": "xai-oauth",
+    "x-ai-oauth": "xai-oauth",
+    "xai-grok-oauth": "xai-oauth",
     "x-ai": "xai",
     "x.ai": "xai",
     "nim": "nvidia",
@@ -2166,6 +2182,8 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
         except Exception:
             access_token = None
         return get_codex_model_ids(access_token=access_token)
+    if normalized == "xai-oauth":
+        return list(_PROVIDER_MODELS.get("xai-oauth", _PROVIDER_MODELS.get("xai", [])))
     if normalized in {"copilot", "copilot-acp"}:
         try:
             live = _fetch_github_models(_resolve_copilot_catalog_api_key())
@@ -3444,14 +3462,14 @@ def validate_requested_model(
             "message": message,
         }
 
-    # OpenAI Codex has its own catalog path; /v1/models probing is not the right validation path.
-    if normalized == "openai-codex":
+    # Providers with non-standard catalog validation — /v1/models probing is not the right path.
+    if normalized in {"openai-codex", "xai-oauth"}:
         try:
-            codex_models = provider_model_ids("openai-codex")
+            catalog_models = provider_model_ids(normalized)
         except Exception:
-            codex_models = []
-        if codex_models:
-            if requested_for_lookup in set(codex_models):
+            catalog_models = []
+        if catalog_models:
+            if requested_for_lookup in set(catalog_models):
                 return {
                     "accepted": True,
                     "persist": True,
@@ -3459,7 +3477,7 @@ def validate_requested_model(
                     "message": None,
                 }
             # Auto-correct if the top match is very similar (e.g. typo)
-            auto = get_close_matches(requested_for_lookup, codex_models, n=1, cutoff=0.9)
+            auto = get_close_matches(requested_for_lookup, catalog_models, n=1, cutoff=0.9)
             if auto:
                 return {
                     "accepted": True,
@@ -3468,17 +3486,18 @@ def validate_requested_model(
                     "corrected_model": auto[0],
                     "message": f"Auto-corrected `{requested}` → `{auto[0]}`",
                 }
-            suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5)
+            suggestions = get_close_matches(requested_for_lookup, catalog_models, n=3, cutoff=0.5)
             suggestion_text = ""
             if suggestions:
                 suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
+            provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok Subscription)"
             return {
                 "accepted": True,
                 "persist": True,
                 "recognized": False,
                 "message": (
-                    f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
-                    "It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID."
+                    f"Note: `{requested}` was not found in the {provider_label} model listing. "
+                    "It may still work if your account has access to a newer or hidden model ID."
                     f"{suggestion_text}"
                 ),
             }
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index 08fc173dc69..9243b3f6f84 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -60,6 +60,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
         auth_type="oauth_external",
         base_url_override="https://chatgpt.com/backend-api/codex",
     ),
+    "xai-oauth": HermesOverlay(
+        transport="codex_responses",
+        auth_type="oauth_external",
+        base_url_override="https://api.x.ai/v1",
+        base_url_env_var="XAI_BASE_URL",
+    ),
     "qwen-oauth": HermesOverlay(
         transport="openai_chat",
         auth_type="oauth_external",
@@ -244,6 +250,10 @@ ALIASES: Dict[str, str] = {
     "x-ai": "xai",
     "x.ai": "xai",
     "grok": "xai",
+    "grok-oauth": "xai-oauth",
+    "xai-oauth": "xai-oauth",
+    "x-ai-oauth": "xai-oauth",
+    "xai-grok-oauth": "xai-oauth",
 
     # nvidia
     "nim": "nvidia",
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index d7c30fe5648..c0baf14db92 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -15,12 +15,14 @@ from hermes_cli.auth import (
     AuthError,
     DEFAULT_CODEX_BASE_URL,
     DEFAULT_QWEN_BASE_URL,
+    DEFAULT_XAI_OAUTH_BASE_URL,
     PROVIDER_REGISTRY,
     _agent_key_is_usable,
     format_auth_error,
     resolve_provider,
     resolve_nous_runtime_credentials,
     resolve_codex_runtime_credentials,
+    resolve_xai_oauth_runtime_credentials,
     resolve_qwen_runtime_credentials,
     resolve_gemini_oauth_runtime_credentials,
     resolve_api_key_provider_credentials,
@@ -238,6 +240,9 @@ def _resolve_runtime_from_pool_entry(
     if provider == "openai-codex":
         api_mode = "codex_responses"
         base_url = base_url or DEFAULT_CODEX_BASE_URL
+    elif provider == "xai-oauth":
+        api_mode = "codex_responses"
+        base_url = base_url or DEFAULT_XAI_OAUTH_BASE_URL
     elif provider == "qwen-oauth":
         api_mode = "chat_completions"
         base_url = base_url or DEFAULT_QWEN_BASE_URL
@@ -1132,6 +1137,24 @@ def resolve_runtime_provider(
             logger.info("Auto-detected Codex provider but credentials failed; "
                         "falling through to next provider.")
 
+    if provider == "xai-oauth":
+        try:
+            creds = resolve_xai_oauth_runtime_credentials()
+            return {
+                "provider": "xai-oauth",
+                "api_mode": "codex_responses",
+                "base_url": (creds.get("base_url") or "").rstrip("/") or DEFAULT_XAI_OAUTH_BASE_URL,
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "hermes-auth-store"),
+                "last_refresh": creds.get("last_refresh"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            logger.info("Auto-detected xAI OAuth provider but credentials failed; "
+                        "falling through to next provider.")
+
     if provider == "qwen-oauth":
         try:
             creds = resolve_qwen_runtime_credentials()
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 5d635b2c464..50e198b9dc7 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -1091,6 +1091,58 @@ def _install_kittentts_deps() -> bool:
         return False
 
 
+def _xai_oauth_logged_in_for_setup() -> bool:
+    """True iff xAI Grok OAuth credentials are already stored locally.
+
+    Lets TTS / STT setup skip the API-key prompt for users who logged in
+    through ``hermes model`` -> xAI Grok OAuth (SuperGrok Subscription).
+    """
+    try:
+        from hermes_cli.auth import get_xai_oauth_auth_status
+
+        return bool(get_xai_oauth_auth_status().get("logged_in"))
+    except Exception:
+        return False
+
+
+def _run_xai_oauth_login_from_setup() -> bool:
+    """Run the xAI Grok OAuth loopback login from inside the setup wizard.
+
+    Returns True on success, False on any failure (the caller falls back
+    to whatever the user picked next, e.g. Edge TTS).
+    """
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_XAI_OAUTH_BASE_URL,
+            _is_remote_session,
+            _save_xai_oauth_tokens,
+            _update_config_for_provider,
+            _xai_oauth_loopback_login,
+        )
+    except Exception as exc:
+        print_warning(f"xAI Grok OAuth helpers unavailable: {exc}")
+        return False
+
+    open_browser = not _is_remote_session()
+    print()
+    print_info("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
+    try:
+        creds = _xai_oauth_loopback_login(open_browser=open_browser)
+        _save_xai_oauth_tokens(
+            creds["tokens"],
+            discovery=creds.get("discovery"),
+            redirect_uri=creds.get("redirect_uri", ""),
+            last_refresh=creds.get("last_refresh"),
+        )
+        _update_config_for_provider(
+            "xai-oauth", creds.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL)
+        )
+        return True
+    except Exception as exc:
+        print_warning(f"xAI Grok OAuth login failed: {exc}")
+        return False
+
+
 def _setup_tts_provider(config: dict):
     """Interactive TTS provider selection with install flow for NeuTTS."""
     tts_config = config.get("tts", {})
@@ -1125,7 +1177,7 @@ def _setup_tts_provider(config: dict):
             "Edge TTS (free, cloud-based, no setup needed)",
             "ElevenLabs (premium quality, needs API key)",
             "OpenAI TTS (good quality, needs API key)",
-            "xAI TTS (Grok voices, needs API key)",
+            "xAI TTS (Grok voices — OAuth login or API key)",
             "MiniMax TTS (high quality with voice cloning, needs API key)",
             "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
             "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
@@ -1199,21 +1251,59 @@ def _setup_tts_provider(config: dict):
                 selected = "edge"
 
     elif selected == "xai":
-        existing = get_env_value("XAI_API_KEY")
-        if not existing:
+        # Resolution order: existing OAuth tokens (free for SuperGrok subscribers
+        # via the Hermes auth store) > existing XAI_API_KEY > prompt the user.
+        # When neither is configured, offer both options instead of forcing the
+        # API-key path — xAI TTS works fine with OAuth bearer tokens too.
+        oauth_logged_in = _xai_oauth_logged_in_for_setup()
+        existing_api_key = get_env_value("XAI_API_KEY")
+
+        if oauth_logged_in:
+            print_success(
+                "xAI TTS will use your xAI Grok OAuth (SuperGrok Subscription) "
+                "credentials"
+            )
+        elif existing_api_key:
+            print_success("xAI TTS will use your existing XAI_API_KEY")
+        else:
             print()
-            api_key = prompt("xAI API key for TTS", password=True)
-            if api_key:
-                save_env_value("XAI_API_KEY", api_key)
-                print_success("xAI TTS API key saved")
+            choice_idx = prompt_choice(
+                "How do you want xAI TTS to authenticate?",
+                choices=[
+                    "Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
+                    "Paste an xAI API key (console.x.ai)",
+                    "Skip → fallback to Edge TTS",
+                ],
+                default=0,
+            )
+            if choice_idx == 0:
+                if _run_xai_oauth_login_from_setup():
+                    print_success(
+                        "Logged in — xAI TTS will use these OAuth credentials"
+                    )
+                else:
+                    print_warning(
+                        "xAI Grok OAuth login did not complete. "
+                        "Falling back to Edge TTS."
+                    )
+                    selected = "edge"
+            elif choice_idx == 1:
+                api_key = prompt("xAI API key for TTS", password=True)
+                if api_key:
+                    save_env_value("XAI_API_KEY", api_key)
+                    print_success("xAI TTS API key saved")
+                else:
+                    from hermes_constants import display_hermes_home as _dhh
+                    print_warning(
+                        "No xAI API key provided for TTS. Configure XAI_API_KEY "
+                        f"via hermes setup model or {_dhh()}/.env to use xAI TTS. "
+                        "Falling back to Edge TTS."
+                    )
+                    selected = "edge"
             else:
-                from hermes_constants import display_hermes_home as _dhh
-                print_warning(
-                    "No xAI API key provided for TTS. Configure XAI_API_KEY via "
-                    f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
-                    "Falling back to Edge TTS."
-                )
+                print_warning("xAI TTS skipped. Falling back to Edge TTS.")
                 selected = "edge"
+
         if selected == "xai":
             print()
             voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)")
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index fc5b1acf5cf..891ffdeb05a 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -194,11 +194,10 @@ TOOL_CATEGORIES = {
             },
             {
                 "name": "xAI TTS",
-                "tag": "Grok voices - requires xAI API key",
-                "env_vars": [
-                    {"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
-                ],
+                "tag": "Grok voices — uses xAI Grok OAuth or XAI_API_KEY",
+                "env_vars": [],
                 "tts_provider": "xai",
+                "post_setup": "xai_grok",
             },
             {
                 "name": "ElevenLabs",
@@ -925,6 +924,73 @@ def _run_post_setup(post_setup_key: str):
         _print_info("    Restart Hermes for tracing to take effect.")
         _print_info("    Verify: hermes plugins list")
 
+    elif post_setup_key == "xai_grok":
+        # Shared credential bootstrap for any picker entry that talks to xAI
+        # (TTS, Video Gen, future Image Gen, etc.). Accepts either a
+        # SuperGrok-tier OAuth bearer token (preferred — billed against the
+        # user's existing subscription) or a raw XAI_API_KEY from
+        # console.x.ai. The picker entries declare empty env_vars so we
+        # drive the full auth UX here.
+        try:
+            from hermes_cli.auth import get_xai_oauth_auth_status
+            oauth_logged_in = bool(get_xai_oauth_auth_status().get("logged_in"))
+        except Exception:
+            oauth_logged_in = False
+        existing_api_key = get_env_value("XAI_API_KEY")
+
+        if oauth_logged_in:
+            _print_success(
+                "    xAI will use your xAI Grok OAuth (SuperGrok Subscription) credentials"
+            )
+            return
+        if existing_api_key:
+            _print_success("    xAI will use your existing XAI_API_KEY")
+            return
+
+        _print_info("    xAI needs credentials. Choose one:")
+        try:
+            from hermes_cli.setup import (
+                _run_xai_oauth_login_from_setup,
+                prompt_choice,
+                prompt as _setup_prompt,
+            )
+            from hermes_cli.config import save_env_value
+        except Exception as exc:
+            _print_warning(f"    Could not load setup helpers: {exc}")
+            _print_info("    Run later: hermes auth add xai-oauth   (or set XAI_API_KEY)")
+            return
+
+        idx = prompt_choice(
+            "    How do you want xAI to authenticate?",
+            choices=[
+                "Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
+                "Paste an xAI API key (console.x.ai)",
+                "Skip — configure later via `hermes auth add xai-oauth`",
+            ],
+            default=0,
+        )
+        if idx == 0:
+            if _run_xai_oauth_login_from_setup():
+                _print_success(
+                    "    Logged in — xAI will use these OAuth credentials"
+                )
+            else:
+                _print_warning(
+                    "    xAI Grok OAuth login did not complete. "
+                    "Run later: hermes auth add xai-oauth"
+                )
+        elif idx == 1:
+            api_key = _setup_prompt("    xAI API key", password=True)
+            if api_key:
+                save_env_value("XAI_API_KEY", api_key)
+                _print_success("    XAI_API_KEY saved")
+            else:
+                _print_warning(
+                    "    No API key provided. Run later: hermes auth add xai-oauth"
+                )
+        else:
+            _print_info("    xAI will remain inactive until credentials are configured.")
+
 
 # ─── Platform / Toolset Helpers ───────────────────────────────────────────────
 
diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py
index ea8721075d0..d5aac4eccdd 100644
--- a/plugins/image_gen/xai/__init__.py
+++ b/plugins/image_gen/xai/__init__.py
@@ -31,7 +31,7 @@ from agent.image_gen_provider import (
     save_b64_image,
     success_response,
 )
-from tools.xai_http import hermes_xai_user_agent
+from tools.xai_http import hermes_xai_user_agent, resolve_xai_http_credentials
 
 logger = logging.getLogger(__name__)
 
@@ -39,14 +39,17 @@ logger = logging.getLogger(__name__)
 # Model catalog
 # ---------------------------------------------------------------------------
 
-API_MODEL = "grok-imagine-image"
-
 _MODELS: Dict[str, Dict[str, Any]] = {
     "grok-imagine-image": {
         "display": "Grok Imagine Image",
         "speed": "~5-10s",
         "strengths": "Fast, high-quality",
     },
+    "grok-imagine-image-quality": {
+        "display": "Grok Imagine Image (Quality)",
+        "speed": "~10-20s",
+        "strengths": "Higher fidelity / detail; slower than the standard model.",
+    },
 }
 
 DEFAULT_MODEL = "grok-imagine-image"
@@ -127,7 +130,8 @@ class XAIImageGenProvider(ImageGenProvider):
         return "xAI (Grok)"
 
     def is_available(self) -> bool:
-        return bool(os.getenv("XAI_API_KEY"))
+        creds = resolve_xai_http_credentials()
+        return bool(creds.get("api_key"))
 
     def list_models(self) -> List[Dict[str, Any]]:
         return [
@@ -141,17 +145,16 @@ class XAIImageGenProvider(ImageGenProvider):
         ]
 
     def get_setup_schema(self) -> Dict[str, Any]:
+        # Auth resolution is delegated to the shared ``xai_grok`` post_setup
+        # hook (``hermes_cli/tools_config.py``); identical to the TTS / video
+        # gen entries so users see the same OAuth-or-API-key choice for every
+        # xAI service.
         return {
-            "name": "xAI (Grok)",
+            "name": "xAI Grok Imagine (image)",
             "badge": "paid",
-            "tag": "Native xAI image generation via grok-imagine-image",
-            "env_vars": [
-                {
-                    "key": "XAI_API_KEY",
-                    "prompt": "xAI API key",
-                    "url": "https://console.x.ai/",
-                },
-            ],
+            "tag": "grok-imagine-image — text-to-image; uses xAI Grok OAuth or XAI_API_KEY",
+            "env_vars": [],
+            "post_setup": "xai_grok",
         }
 
     def generate(
@@ -161,12 +164,14 @@ class XAIImageGenProvider(ImageGenProvider):
         **kwargs: Any,
     ) -> Dict[str, Any]:
         """Generate an image using xAI's grok-imagine-image."""
-        api_key = os.getenv("XAI_API_KEY", "").strip()
+        creds = resolve_xai_http_credentials()
+        api_key = str(creds.get("api_key") or "").strip()
+        provider_name = str(creds.get("provider") or "xai").strip() or "xai"
         if not api_key:
             return error_response(
-                error="XAI_API_KEY not set. Get one at https://console.x.ai/",
+                error="No xAI credentials found. Configure xAI OAuth in `hermes model` or set XAI_API_KEY.",
                 error_type="missing_api_key",
-                provider="xai",
+                provider=provider_name,
                 aspect_ratio=aspect_ratio,
             )
 
@@ -177,7 +182,7 @@ class XAIImageGenProvider(ImageGenProvider):
         xai_res = resolution if resolution in _XAI_RESOLUTIONS else DEFAULT_RESOLUTION
 
         payload: Dict[str, Any] = {
-            "model": API_MODEL,
+            "model": model_id,
             "prompt": prompt,
             "aspect_ratio": xai_ar,
             "resolution": xai_res,
@@ -189,7 +194,7 @@ class XAIImageGenProvider(ImageGenProvider):
             "User-Agent": hermes_xai_user_agent(),
         }
 
-        base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
+        base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/")
 
         try:
             response = requests.post(
@@ -210,7 +215,7 @@ class XAIImageGenProvider(ImageGenProvider):
             return error_response(
                 error=f"xAI image generation failed ({status}): {err_msg}",
                 error_type="api_error",
-                provider="xai",
+                provider=provider_name,
                 model=model_id,
                 prompt=prompt,
                 aspect_ratio=aspect,
@@ -219,7 +224,7 @@ class XAIImageGenProvider(ImageGenProvider):
             return error_response(
                 error="xAI image generation timed out (120s)",
                 error_type="timeout",
-                provider="xai",
+                provider=provider_name,
                 model=model_id,
                 prompt=prompt,
                 aspect_ratio=aspect,
@@ -228,7 +233,7 @@ class XAIImageGenProvider(ImageGenProvider):
             return error_response(
                 error=f"xAI connection error: {exc}",
                 error_type="connection_error",
-                provider="xai",
+                provider=provider_name,
                 model=model_id,
                 prompt=prompt,
                 aspect_ratio=aspect,
@@ -240,7 +245,7 @@ class XAIImageGenProvider(ImageGenProvider):
             return error_response(
                 error=f"xAI returned invalid JSON: {exc}",
                 error_type="invalid_response",
-                provider="xai",
+                provider=provider_name,
                 model=model_id,
                 prompt=prompt,
                 aspect_ratio=aspect,
@@ -252,7 +257,7 @@ class XAIImageGenProvider(ImageGenProvider):
             return error_response(
                 error="xAI returned no image data",
                 error_type="empty_response",
-                provider="xai",
+                provider=provider_name,
                 model=model_id,
                 prompt=prompt,
                 aspect_ratio=aspect,
diff --git a/plugins/video_gen/xai/__init__.py b/plugins/video_gen/xai/__init__.py
index b7421799044..d6fe9d04a7b 100644
--- a/plugins/video_gen/xai/__init__.py
+++ b/plugins/video_gen/xai/__init__.py
@@ -10,8 +10,12 @@ Originally salvaged from PR #10600 by @Jaaneek; reshaped into the
 :class:`VideoGenProvider` plugin interface and trimmed to the
 generate-only surface.
 
-Authentication via ``XAI_API_KEY``. Output is an HTTPS URL from xAI's
-CDN; the gateway downloads and delivers it.
+Authentication: xAI Grok OAuth tokens (preferred — billed against the
+user's SuperGrok subscription) or ``XAI_API_KEY``. Both routes are
+resolved through ``tools.xai_http.resolve_xai_http_credentials`` so a
+single login covers chat + TTS + image gen + video gen + transcription.
+Output is an HTTPS URL from xAI's CDN; the gateway downloads and
+delivers it.
 """
 
 from __future__ import annotations
@@ -20,7 +24,7 @@ import asyncio
 import logging
 import os
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import httpx
 
@@ -66,24 +70,44 @@ _MODELS: Dict[str, Dict[str, Any]] = {
 # ---------------------------------------------------------------------------
 
 
-def _xai_base_url() -> str:
-    return (os.getenv("XAI_BASE_URL") or DEFAULT_XAI_BASE_URL).strip().rstrip("/")
+def _resolve_xai_credentials() -> Tuple[str, str]:
+    """Return ``(api_key, base_url)`` from the shared xAI credential resolver.
+
+    Order: runtime provider (xai-oauth pool entry) → singleton ``auth.json``
+    OAuth tokens → ``XAI_API_KEY`` env var. ``api_key`` is empty when no
+    credential source is available; callers must check before using it.
+    """
+    try:
+        from tools.xai_http import resolve_xai_http_credentials
+
+        creds = resolve_xai_http_credentials() or {}
+    except Exception as exc:
+        logger.debug("xAI credential resolver failed: %s", exc)
+        creds = {}
+
+    api_key = str(creds.get("api_key") or os.getenv("XAI_API_KEY", "")).strip()
+    base_url = str(
+        creds.get("base_url")
+        or os.getenv("XAI_BASE_URL")
+        or DEFAULT_XAI_BASE_URL
+    ).strip().rstrip("/")
+    return api_key, base_url
 
 
-def _xai_headers() -> Dict[str, str]:
-    api_key = os.getenv("XAI_API_KEY", "").strip()
-    if not api_key:
-        raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/")
+def _xai_user_agent() -> str:
     try:
         from tools.xai_http import hermes_xai_user_agent
 
-        ua = hermes_xai_user_agent()
+        return hermes_xai_user_agent()
     except Exception:
-        ua = "hermes-agent/video_gen"
+        return "hermes-agent/video_gen"
+
+
+def _xai_headers(api_key: str) -> Dict[str, str]:
     return {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json",
-        "User-Agent": ua,
+        "User-Agent": _xai_user_agent(),
     }
 
 
@@ -110,12 +134,15 @@ def _clamp_duration(duration: Optional[int], has_reference_images: bool) -> int:
 async def _submit(
     client: httpx.AsyncClient,
     payload: Dict[str, Any],
+    *,
+    api_key: str,
+    base_url: str,
 ) -> str:
     """POST to /videos/generations — xAI's only public endpoint for our
     text-to-video and image-to-video surface."""
     response = await client.post(
-        f"{_xai_base_url()}/videos/generations",
-        headers={**_xai_headers(), "x-idempotency-key": str(uuid.uuid4())},
+        f"{base_url}/videos/generations",
+        headers={**_xai_headers(api_key), "x-idempotency-key": str(uuid.uuid4())},
         json=payload,
         timeout=60,
     )
@@ -131,6 +158,8 @@ async def _poll(
     client: httpx.AsyncClient,
     request_id: str,
     *,
+    api_key: str,
+    base_url: str,
     timeout_seconds: int,
     poll_interval: int,
 ) -> Dict[str, Any]:
@@ -138,8 +167,8 @@ async def _poll(
     last_status = "queued"
     while elapsed < timeout_seconds:
         response = await client.get(
-            f"{_xai_base_url()}/videos/{request_id}",
-            headers=_xai_headers(),
+            f"{base_url}/videos/{request_id}",
+            headers=_xai_headers(api_key),
             timeout=30,
         )
         response.raise_for_status()
@@ -174,7 +203,8 @@ class XAIVideoGenProvider(VideoGenProvider):
         return "xAI"
 
     def is_available(self) -> bool:
-        return bool(os.environ.get("XAI_API_KEY", "").strip())
+        api_key, _ = _resolve_xai_credentials()
+        return bool(api_key)
 
     def list_models(self) -> List[Dict[str, Any]]:
         return [{"id": mid, **meta} for mid, meta in _MODELS.items()]
@@ -183,17 +213,18 @@ class XAIVideoGenProvider(VideoGenProvider):
         return DEFAULT_MODEL
 
     def get_setup_schema(self) -> Dict[str, Any]:
+        # Auth resolution lives entirely in the shared ``xai_grok`` post_setup
+        # hook (``hermes_cli/tools_config.py``) so the picker doesn't blindly
+        # prompt for an API key when the user is already signed in via xAI
+        # Grok OAuth (SuperGrok Subscription) — TTS / image gen / video gen
+        # all share the same credential resolver. The hook offers an
+        # OAuth-vs-API-key choice when neither is configured.
         return {
-            "name": "xAI",
+            "name": "xAI Grok Imagine",
             "badge": "paid",
-            "tag": "grok-imagine-video — text-to-video & image-to-video with reference images",
-            "env_vars": [
-                {
-                    "key": "XAI_API_KEY",
-                    "prompt": "xAI API key",
-                    "url": "https://console.x.ai/",
-                },
-            ],
+            "tag": "grok-imagine-video — text-to-video & image-to-video; uses xAI Grok OAuth or XAI_API_KEY",
+            "env_vars": [],
+            "post_setup": "xai_grok",
         }
 
     def capabilities(self) -> Dict[str, Any]:
@@ -259,9 +290,14 @@ class XAIVideoGenProvider(VideoGenProvider):
         aspect_ratio: str,
         resolution: str,
     ) -> Dict[str, Any]:
-        if not os.environ.get("XAI_API_KEY", "").strip():
+        api_key, base_url = _resolve_xai_credentials()
+        if not api_key:
             return error_response(
-                error="XAI_API_KEY not set. Get one at https://console.x.ai/",
+                error=(
+                    "No xAI credentials found. Sign in via `hermes auth add xai-oauth` "
+                    "(SuperGrok subscription) or set XAI_API_KEY from "
+                    "https://console.x.ai/."
+                ),
                 error_type="auth_required",
                 provider="xai", prompt=prompt,
             )
@@ -317,7 +353,9 @@ class XAIVideoGenProvider(VideoGenProvider):
 
         async with httpx.AsyncClient() as client:
             try:
-                request_id = await _submit(client, payload)
+                request_id = await _submit(
+                    client, payload, api_key=api_key, base_url=base_url
+                )
             except httpx.HTTPStatusError as exc:
                 detail = ""
                 try:
@@ -334,6 +372,7 @@ class XAIVideoGenProvider(VideoGenProvider):
 
             poll_result = await _poll(
                 client, request_id,
+                api_key=api_key, base_url=base_url,
                 timeout_seconds=DEFAULT_TIMEOUT_SECONDS,
                 poll_interval=DEFAULT_POLL_INTERVAL_SECONDS,
             )
diff --git a/run_agent.py b/run_agent.py
index a4df8749777..a82c6417ae1 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1275,7 +1275,7 @@ class AIAgent:
             self.api_mode = api_mode
         elif self.provider == "openai-codex":
             self.api_mode = "codex_responses"
-        elif self.provider == "xai":
+        elif self.provider in {"xai", "xai-oauth"}:
             self.api_mode = "codex_responses"
         elif (provider_name is None) and (
             self._base_url_hostname == "chatgpt.com"
@@ -7139,15 +7139,60 @@ class AIAgent:
         raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
 
     def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
-        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
+        if self.api_mode != "codex_responses" or self.provider not in {"openai-codex", "xai-oauth"}:
+            return False
+
+        # Guard against silent account swap.
+        #
+        # When an agent is using a non-singleton credential — e.g. a manual
+        # pool entry (``hermes auth add xai-oauth``) whose tokens belong to
+        # a different account than the loopback_pkce singleton, or an agent
+        # constructed with an explicit ``api_key=`` arg — force-refreshing
+        # the singleton here and adopting its tokens silently re-routes the
+        # rest of the conversation onto the singleton's account.  The
+        # credential pool's reactive recovery (``_recover_with_credential_pool``)
+        # is the right channel for that case; this path is the
+        # singleton-only fallback used when the pool can't recover, and
+        # MUST only fire when the agent really is on singleton tokens.
+        try:
+            if self.provider == "openai-codex":
+                from hermes_cli.auth import resolve_codex_runtime_credentials
+
+                singleton_now = resolve_codex_runtime_credentials(
+                    refresh_if_expiring=False,
+                )
+            else:
+                from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+                singleton_now = resolve_xai_oauth_runtime_credentials(
+                    refresh_if_expiring=False,
+                )
+        except Exception as exc:
+            logger.debug("%s singleton read failed: %s", self.provider, exc)
+            return False
+
+        singleton_key = str(singleton_now.get("api_key") or "").strip()
+        active_key = str(self.api_key or "").strip()
+        if singleton_key and active_key and singleton_key != active_key:
+            logger.debug(
+                "%s singleton tokens differ from the active api_key; "
+                "skipping singleton force-refresh to avoid silent account swap. "
+                "Reactive credential rotation should go through the pool.",
+                self.provider,
+            )
             return False
 
         try:
-            from hermes_cli.auth import resolve_codex_runtime_credentials
+            if self.provider == "openai-codex":
+                from hermes_cli.auth import resolve_codex_runtime_credentials
 
-            creds = resolve_codex_runtime_credentials(force_refresh=force)
+                creds = resolve_codex_runtime_credentials(force_refresh=force)
+            else:
+                from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+                creds = resolve_xai_oauth_runtime_credentials(force_refresh=force)
         except Exception as exc:
-            logger.debug("Codex credential refresh failed: %s", exc)
+            logger.debug("%s credential refresh failed: %s", self.provider, exc)
             return False
 
         api_key = creds.get("api_key")
@@ -7162,7 +7207,7 @@ class AIAgent:
         self._client_kwargs["api_key"] = self.api_key
         self._client_kwargs["base_url"] = self.base_url
 
-        if not self._replace_primary_openai_client(reason="codex_credential_refresh"):
+        if not self._replace_primary_openai_client(reason=f"{self.provider}_credential_refresh"):
             return False
 
         return True
@@ -9631,7 +9676,7 @@ class AIAgent:
                     and "/backend-api/codex" in self._base_url_lower
                 )
             )
-            is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai"
+            is_xai_responses = self.provider in {"xai", "xai-oauth"} or self._base_url_hostname == "api.x.ai"
             _msgs_for_codex = self._prepare_messages_for_non_vision_model(api_messages)
             return _ct.build_kwargs(
                 model=self.model,
@@ -13700,13 +13745,14 @@ class AIAgent:
 
                     if (
                         self.api_mode == "codex_responses"
-                        and self.provider == "openai-codex"
+                        and self.provider in {"openai-codex", "xai-oauth"}
                         and status_code == 401
                         and not codex_auth_retry_attempted
                     ):
                         codex_auth_retry_attempted = True
                         if self._try_refresh_codex_client_credentials(force=True):
-                            self._vprint(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
+                            _label = "xAI OAuth" if self.provider == "xai-oauth" else "Codex"
+                            self._vprint(f"{self.log_prefix}🔐 {_label} auth refreshed after 401. Retrying request...")
                             continue
                     if (
                         self.api_mode == "chat_completions"
@@ -14346,11 +14392,15 @@ class AIAgent:
                         self._vprint(f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True)
                         # Actionable guidance for common auth errors
                         if classified.is_auth or classified.reason == FailoverReason.billing:
-                            if _provider == "openai-codex" and status_code == 401:
-                                self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
-                                self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
-                                self._vprint(f"{self.log_prefix}      1. Run `codex` in your terminal to generate fresh tokens.", force=True)
-                                self._vprint(f"{self.log_prefix}      2. Then run `hermes auth` to re-authenticate.", force=True)
+                            if _provider in {"openai-codex", "xai-oauth"} and status_code == 401:
+                                if _provider == "openai-codex":
+                                    self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
+                                    self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
+                                    self._vprint(f"{self.log_prefix}      1. Run `codex` in your terminal to generate fresh tokens.", force=True)
+                                    self._vprint(f"{self.log_prefix}      2. Then run `hermes auth` to re-authenticate.", force=True)
+                                else:
+                                    self._vprint(f"{self.log_prefix}   💡 xAI OAuth token was rejected (HTTP 401). To fix:", force=True)
+                                    self._vprint(f"{self.log_prefix}      re-authenticate with xAI Grok OAuth (SuperGrok Subscription) from `hermes model`.", force=True)
                             else:
                                 self._vprint(f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:", force=True)
                                 self._vprint(f"{self.log_prefix}      • Is the key valid? Run: hermes setup", force=True)
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
index 6a4cda173ad..7100e8ac17d 100644
--- a/tests/agent/transports/test_codex_transport.py
+++ b/tests/agent/transports/test_codex_transport.py
@@ -100,6 +100,49 @@ class TestCodexBuildKwargs:
         )
         assert "prompt_cache_key" not in kw
 
+    def test_xai_responses_sends_cache_key_via_extra_body(self, transport):
+        """xAI's Responses API documents ``prompt_cache_key`` as the
+        body-level cache-routing key (the ``x-grok-conv-id`` header is
+        Chat-Completions-only). Passing it via ``extra_body`` is robust
+        against openai SDK builds whose ``Responses.stream()`` kwarg
+        signature ever drops the field — the body field still serializes
+        and reaches xAI either way. The ``x-grok-conv-id`` header is kept
+        as a belt-and-braces fallback so cache routing survives even
+        when the body field would be stripped by an intermediate proxy.
+        Ref: https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits
+        """
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="grok-4.3", messages=messages, tools=[],
+            session_id="conv-xai-1",
+            is_xai_responses=True,
+        )
+        # Top-level prompt_cache_key must NOT be set for xAI — the SDK
+        # signature drop is what motivated the extra_body indirection in
+        # the first place. The cache-routing field must travel in the
+        # body via extra_body.
+        assert "prompt_cache_key" not in kw
+        assert kw.get("extra_body", {}).get("prompt_cache_key") == "conv-xai-1"
+        # Header kept as belt-and-braces.
+        assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-xai-1"
+
+    def test_xai_responses_extra_body_preserves_caller_fields(self, transport):
+        """When the caller already supplies ``extra_body`` (e.g. via
+        request_overrides), the xAI cache-key injection must merge into
+        the existing dict instead of overwriting it. Caller-supplied
+        ``prompt_cache_key`` wins (setdefault semantics) so user overrides
+        aren't silently clobbered by the transport."""
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="grok-4.3", messages=messages, tools=[],
+            session_id="conv-xai-1",
+            is_xai_responses=True,
+            request_overrides={"extra_body": {"prompt_cache_key": "caller-override", "other_field": 42}},
+        )
+        eb = kw.get("extra_body", {})
+        assert eb.get("prompt_cache_key") == "caller-override"
+        assert eb.get("other_field") == 42
+
     def test_max_tokens(self, transport):
         messages = [{"role": "user", "content": "Hi"}]
         kw = transport.build_kwargs(
diff --git a/tests/hermes_cli/test_auth_xai_oauth_provider.py b/tests/hermes_cli/test_auth_xai_oauth_provider.py
new file mode 100644
index 00000000000..9f1cc55f57e
--- /dev/null
+++ b/tests/hermes_cli/test_auth_xai_oauth_provider.py
@@ -0,0 +1,1605 @@
+"""Tests for xAI Grok OAuth — tokens stored in Hermes auth store (~/.hermes/auth.json)."""
+
+import base64
+import json
+import time
+from pathlib import Path
+
+import pytest
+
+from hermes_cli.auth import (
+    AuthError,
+    DEFAULT_XAI_OAUTH_BASE_URL,
+    PROVIDER_REGISTRY,
+    XAI_OAUTH_CLIENT_ID,
+    XAI_OAUTH_REDIRECT_HOST,
+    XAI_OAUTH_REDIRECT_PATH,
+    XAI_OAUTH_SCOPE,
+    _read_xai_oauth_tokens,
+    _save_xai_oauth_tokens,
+    _xai_access_token_is_expiring,
+    _xai_callback_cors_origin,
+    _xai_oauth_build_authorize_url,
+    _xai_validate_loopback_redirect_uri,
+    get_xai_oauth_auth_status,
+    refresh_xai_oauth_pure,
+    resolve_provider,
+    resolve_xai_oauth_runtime_credentials,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _setup_hermes_auth(
+    hermes_home: Path,
+    *,
+    access_token: str = "access",
+    refresh_token: str = "refresh",
+    discovery: dict | None = None,
+):
+    """Write xAI OAuth tokens into the Hermes auth store at the given root."""
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    state = {
+        "tokens": {
+            "access_token": access_token,
+            "refresh_token": refresh_token,
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+        },
+        "last_refresh": "2026-05-14T00:00:00Z",
+        "auth_mode": "oauth_pkce",
+    }
+    if discovery is not None:
+        state["discovery"] = discovery
+    auth_store = {
+        "version": 1,
+        "active_provider": "xai-oauth",
+        "providers": {"xai-oauth": state},
+    }
+    auth_file = hermes_home / "auth.json"
+    auth_file.write_text(json.dumps(auth_store, indent=2))
+    return auth_file
+
+
+def _jwt_with_exp(exp_epoch: int) -> str:
+    """Build a minimal JWT-shaped string with the given exp claim."""
+    payload = {"exp": exp_epoch}
+    encoded = (
+        base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8"))
+        .rstrip(b"=")
+        .decode("utf-8")
+    )
+    return f"h.{encoded}.s"
+
+
+class _StubHTTPResponse:
+    def __init__(self, status_code: int, payload):
+        self.status_code = status_code
+        self._payload = payload
+        self.text = json.dumps(payload) if isinstance(payload, (dict, list)) else str(payload)
+
+    def json(self):
+        if isinstance(self._payload, Exception):
+            raise self._payload
+        return self._payload
+
+
+class _StubHTTPClient:
+    def __init__(self, response):
+        self._response = response
+        self.last_call = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        return False
+
+    def post(self, *args, **kwargs):
+        self.last_call = ("post", args, kwargs)
+        return self._response
+
+
+def _patch_httpx_client(monkeypatch, response):
+    holder = {"client": None}
+
+    def _factory(*args, **kwargs):
+        client = _StubHTTPClient(response)
+        holder["client"] = client
+        return client
+
+    monkeypatch.setattr("hermes_cli.auth.httpx.Client", _factory)
+    return holder
+
+
+# ---------------------------------------------------------------------------
+# Constants and registry
+# ---------------------------------------------------------------------------
+
+
+def test_xai_oauth_provider_registered():
+    assert "xai-oauth" in PROVIDER_REGISTRY
+    pconfig = PROVIDER_REGISTRY["xai-oauth"]
+    assert pconfig.id == "xai-oauth"
+    assert pconfig.auth_type == "oauth_external"
+    assert pconfig.inference_base_url == DEFAULT_XAI_OAUTH_BASE_URL
+
+
+def test_resolve_provider_normalizes_xai_oauth_aliases():
+    assert resolve_provider("xai-oauth") == "xai-oauth"
+    assert resolve_provider("grok-oauth") == "xai-oauth"
+    assert resolve_provider("x-ai-oauth") == "xai-oauth"
+    assert resolve_provider("xai-grok-oauth") == "xai-oauth"
+
+
+# ---------------------------------------------------------------------------
+# JWT expiry detection
+# ---------------------------------------------------------------------------
+
+
+def test_xai_access_token_is_expiring_returns_true_for_expired_jwt():
+    expired = _jwt_with_exp(int(time.time()) - 60)
+    assert _xai_access_token_is_expiring(expired, 0) is True
+
+
+def test_xai_access_token_is_expiring_returns_false_for_fresh_jwt():
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    assert _xai_access_token_is_expiring(fresh, 0) is False
+
+
+def test_xai_access_token_is_expiring_honors_skew_window():
+    near = _jwt_with_exp(int(time.time()) + 30)
+    assert _xai_access_token_is_expiring(near, 60) is True
+    assert _xai_access_token_is_expiring(near, 0) is False
+
+
+def test_xai_access_token_is_expiring_returns_false_for_non_jwt():
+    assert _xai_access_token_is_expiring("not.a.jwt.but.has.dots", 0) is False
+    assert _xai_access_token_is_expiring("opaque-token-no-dots", 0) is False
+    assert _xai_access_token_is_expiring("", 0) is False
+    assert _xai_access_token_is_expiring(None, 0) is False  # type: ignore[arg-type]
+
+
+def test_xai_access_token_is_expiring_returns_false_for_jwt_without_exp():
+    payload = {"sub": "user"}
+    encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode()
+    token = f"h.{encoded}.s"
+    assert _xai_access_token_is_expiring(token, 0) is False
+
+
+# ---------------------------------------------------------------------------
+# Loopback redirect URI validation
+# ---------------------------------------------------------------------------
+
+
+def test_xai_validate_loopback_redirect_uri_accepts_localhost_with_port():
+    host, port, path = _xai_validate_loopback_redirect_uri(
+        "http://127.0.0.1:56121/callback"
+    )
+    assert host == XAI_OAUTH_REDIRECT_HOST
+    assert port == 56121
+    assert path == XAI_OAUTH_REDIRECT_PATH
+
+
+def test_xai_validate_loopback_redirect_uri_rejects_https():
+    with pytest.raises(AuthError) as exc:
+        _xai_validate_loopback_redirect_uri("https://127.0.0.1:56121/callback")
+    assert exc.value.code == "xai_redirect_invalid"
+
+
+def test_xai_validate_loopback_redirect_uri_rejects_non_loopback():
+    with pytest.raises(AuthError) as exc:
+        _xai_validate_loopback_redirect_uri("http://example.com:56121/callback")
+    assert exc.value.code == "xai_redirect_invalid"
+
+
+def test_xai_validate_loopback_redirect_uri_rejects_missing_port():
+    with pytest.raises(AuthError) as exc:
+        _xai_validate_loopback_redirect_uri("http://127.0.0.1/callback")
+    assert exc.value.code == "xai_redirect_invalid"
+
+
+# ---------------------------------------------------------------------------
+# Authorize URL construction
+# ---------------------------------------------------------------------------
+
+
+def _parse_authorize_url(url: str) -> dict:
+    from urllib.parse import urlparse, parse_qs
+
+    parsed = urlparse(url)
+    return {k: v[0] for k, v in parse_qs(parsed.query).items()}
+
+
+def test_xai_oauth_authorize_url_includes_plan_generic():
+    """Regression: accounts.x.ai requires `plan=generic` for loopback OAuth on
+    non-allowlisted clients. Must always be present on the authorize URL."""
+    url = _xai_oauth_build_authorize_url(
+        authorization_endpoint="https://auth.x.ai/oauth2/authorize",
+        redirect_uri="http://127.0.0.1:56121/callback",
+        code_challenge="challenge-xyz",
+        state="state-abc",
+        nonce="nonce-def",
+    )
+    params = _parse_authorize_url(url)
+    assert params["plan"] == "generic"
+
+
+def test_xai_oauth_authorize_url_includes_referrer_hermes_agent():
+    """Attribution: xAI's OAuth server can identify Hermes-originated logins
+    via the referrer query param. Must always be present on the authorize URL."""
+    url = _xai_oauth_build_authorize_url(
+        authorization_endpoint="https://auth.x.ai/oauth2/authorize",
+        redirect_uri="http://127.0.0.1:56121/callback",
+        code_challenge="challenge-xyz",
+        state="state-abc",
+        nonce="nonce-def",
+    )
+    params = _parse_authorize_url(url)
+    assert params["referrer"] == "hermes-agent"
+
+
+def test_xai_oauth_authorize_url_includes_pkce_and_oidc_params():
+    url = _xai_oauth_build_authorize_url(
+        authorization_endpoint="https://auth.x.ai/oauth2/authorize",
+        redirect_uri="http://127.0.0.1:56121/callback",
+        code_challenge="challenge-xyz",
+        state="state-abc",
+        nonce="nonce-def",
+    )
+    params = _parse_authorize_url(url)
+    assert params["response_type"] == "code"
+    assert params["client_id"] == XAI_OAUTH_CLIENT_ID
+    assert params["redirect_uri"] == "http://127.0.0.1:56121/callback"
+    assert params["scope"] == XAI_OAUTH_SCOPE
+    assert params["code_challenge"] == "challenge-xyz"
+    assert params["code_challenge_method"] == "S256"
+    assert params["state"] == "state-abc"
+    assert params["nonce"] == "nonce-def"
+
+
+# ---------------------------------------------------------------------------
+# CORS allowlist
+# ---------------------------------------------------------------------------
+
+
+def test_xai_callback_cors_origin_allowlist():
+    assert _xai_callback_cors_origin("https://accounts.x.ai") == "https://accounts.x.ai"
+    assert _xai_callback_cors_origin("https://auth.x.ai") == "https://auth.x.ai"
+
+
+def test_xai_callback_cors_origin_rejects_unknown_origin():
+    assert _xai_callback_cors_origin("https://attacker.example.com") == ""
+    assert _xai_callback_cors_origin(None) == ""
+    assert _xai_callback_cors_origin("") == ""
+
+
+# ---------------------------------------------------------------------------
+# Token roundtrip + reads
+# ---------------------------------------------------------------------------
+
+
+def test_save_and_read_xai_oauth_tokens_roundtrip(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _save_xai_oauth_tokens(
+        {
+            "access_token": "at-1",
+            "refresh_token": "rt-1",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+        },
+        discovery={"token_endpoint": "https://auth.x.ai/oauth2/token"},
+        redirect_uri="http://127.0.0.1:56121/callback",
+    )
+    data = _read_xai_oauth_tokens()
+    assert data["tokens"]["access_token"] == "at-1"
+    assert data["tokens"]["refresh_token"] == "rt-1"
+    assert data["redirect_uri"] == "http://127.0.0.1:56121/callback"
+    assert data["discovery"]["token_endpoint"] == "https://auth.x.ai/oauth2/token"
+
+
+def test_read_xai_oauth_tokens_missing(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    with pytest.raises(AuthError) as exc:
+        _read_xai_oauth_tokens()
+    assert exc.value.code == "xai_auth_missing"
+    assert exc.value.relogin_required is True
+
+
+def test_read_xai_oauth_tokens_missing_access_token(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    _setup_hermes_auth(hermes_home, access_token="")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    with pytest.raises(AuthError) as exc:
+        _read_xai_oauth_tokens()
+    assert exc.value.code == "xai_auth_missing_access_token"
+    assert exc.value.relogin_required is True
+
+
+def test_read_xai_oauth_tokens_missing_refresh_token(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    _setup_hermes_auth(hermes_home, refresh_token="")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    with pytest.raises(AuthError) as exc:
+        _read_xai_oauth_tokens()
+    assert exc.value.code == "xai_auth_missing_refresh_token"
+    assert exc.value.relogin_required is True
+
+
+# ---------------------------------------------------------------------------
+# Runtime credential resolution
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_xai_runtime_credentials_returns_singleton_state(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False)
+    monkeypatch.delenv("XAI_BASE_URL", raising=False)
+
+    creds = resolve_xai_oauth_runtime_credentials()
+    assert creds["provider"] == "xai-oauth"
+    assert creds["api_key"] == fresh
+    assert creds["base_url"] == DEFAULT_XAI_OAUTH_BASE_URL
+    assert creds["source"] == "hermes-auth-store"
+    assert creds["auth_mode"] == "oauth_pkce"
+
+
+def test_resolve_xai_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    expiring = _jwt_with_exp(int(time.time()) - 10)
+    _setup_hermes_auth(
+        hermes_home,
+        access_token=expiring,
+        refresh_token="rt-old",
+        discovery={"token_endpoint": "https://auth.x.ai/oauth2/token"},
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+    called = {"count": 0}
+
+    def _fake_refresh(tokens, **kwargs):
+        called["count"] += 1
+        updated = dict(tokens)
+        updated["access_token"] = new_access
+        updated["refresh_token"] = "rt-new"
+        return updated
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_xai_oauth_tokens", _fake_refresh)
+
+    creds = resolve_xai_oauth_runtime_credentials()
+    assert called["count"] == 1
+    assert creds["api_key"] == new_access
+
+
+def test_resolve_xai_runtime_credentials_force_refresh(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(
+        hermes_home,
+        access_token=fresh,
+        discovery={"token_endpoint": "https://auth.x.ai/oauth2/token"},
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    forced = _jwt_with_exp(int(time.time()) + 7200)
+    called = {"count": 0}
+
+    def _fake_refresh(tokens, **kwargs):
+        called["count"] += 1
+        updated = dict(tokens)
+        updated["access_token"] = forced
+        return updated
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_xai_oauth_tokens", _fake_refresh)
+
+    creds = resolve_xai_oauth_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+    assert called["count"] == 1
+    assert creds["api_key"] == forced
+
+
+def test_resolve_xai_runtime_credentials_honours_env_base_url(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("HERMES_XAI_BASE_URL", "https://custom.x.ai/v1/")
+
+    creds = resolve_xai_oauth_runtime_credentials()
+    assert creds["base_url"] == "https://custom.x.ai/v1"
+
+
+# ---------------------------------------------------------------------------
+# Auth status surface
+# ---------------------------------------------------------------------------
+
+
+def test_get_xai_oauth_auth_status_logged_in_via_singleton(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    status = get_xai_oauth_auth_status()
+    assert status["logged_in"] is True
+    assert status["api_key"] == fresh
+    assert status["auth_mode"] == "oauth_pkce"
+
+
+def test_get_xai_oauth_auth_status_logged_out(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    status = get_xai_oauth_auth_status()
+    assert status["logged_in"] is False
+    assert "error" in status
+
+
+# ---------------------------------------------------------------------------
+# refresh_xai_oauth_pure error handling
+# ---------------------------------------------------------------------------
+
+
+def test_refresh_xai_oauth_pure_requires_refresh_token():
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure("at", "")
+    assert exc.value.code == "xai_auth_missing_refresh_token"
+    assert exc.value.relogin_required is True
+
+
+def test_refresh_xai_oauth_pure_relogin_on_400(monkeypatch):
+    response = _StubHTTPResponse(400, {"error": "invalid_grant"})
+    _patch_httpx_client(monkeypatch, response)
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token"
+        )
+    assert exc.value.code == "xai_refresh_failed"
+    assert exc.value.relogin_required is True
+
+
+def test_refresh_xai_oauth_pure_no_relogin_on_500(monkeypatch):
+    response = _StubHTTPResponse(503, "service unavailable")
+    _patch_httpx_client(monkeypatch, response)
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token"
+        )
+    assert exc.value.code == "xai_refresh_failed"
+    assert exc.value.relogin_required is False
+
+
+def test_refresh_xai_oauth_pure_returns_updated_tokens(monkeypatch):
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+    response = _StubHTTPResponse(
+        200,
+        {
+            "access_token": new_access,
+            "refresh_token": "rt-rotated",
+            "id_token": "id-1",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+        },
+    )
+    holder = _patch_httpx_client(monkeypatch, response)
+
+    updated = refresh_xai_oauth_pure(
+        "at", "rt-old", token_endpoint="https://auth.x.ai/oauth2/token"
+    )
+    assert updated["access_token"] == new_access
+    assert updated["refresh_token"] == "rt-rotated"
+    assert updated["id_token"] == "id-1"
+    assert updated["token_type"] == "Bearer"
+    assert updated["last_refresh"].endswith("Z")
+    client = holder["client"]
+    assert client is not None
+    _method, _args, kwargs = client.last_call
+    assert kwargs["data"]["grant_type"] == "refresh_token"
+    assert kwargs["data"]["refresh_token"] == "rt-old"
+    assert kwargs["data"]["client_id"] == XAI_OAUTH_CLIENT_ID
+
+
+def test_refresh_xai_oauth_pure_keeps_refresh_token_when_response_omits_it(monkeypatch):
+    """Some OAuth providers don't rotate refresh tokens — preserve the old one."""
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+    response = _StubHTTPResponse(
+        200,
+        {
+            "access_token": new_access,
+            "expires_in": 3600,
+            "token_type": "Bearer",
+        },
+    )
+    _patch_httpx_client(monkeypatch, response)
+
+    updated = refresh_xai_oauth_pure(
+        "at", "rt-stable", token_endpoint="https://auth.x.ai/oauth2/token"
+    )
+    assert updated["access_token"] == new_access
+    assert updated["refresh_token"] == "rt-stable"
+
+
+def test_refresh_xai_oauth_pure_rejects_response_without_access_token(monkeypatch):
+    response = _StubHTTPResponse(
+        200,
+        {"refresh_token": "rt-new", "expires_in": 3600},
+    )
+    _patch_httpx_client(monkeypatch, response)
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token"
+        )
+    assert exc.value.code == "xai_refresh_missing_access_token"
+    assert exc.value.relogin_required is True
+
+
+def test_refresh_xai_oauth_pure_raises_typed_error_on_malformed_json(monkeypatch):
+    """xAI returning HTTP 200 with a non-JSON body (captive portal, proxy
+    error page, etc.) must surface a typed AuthError, not a raw
+    ``json.JSONDecodeError`` traceback. Matches the qwen-oauth precedent
+    so the upstream UX layer (``format_auth_error``) can map the failure."""
+    response = _StubHTTPResponse(200, ValueError("not json"))
+    response.text = "<html>captive portal</html>"
+    _patch_httpx_client(monkeypatch, response)
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token"
+        )
+    assert exc.value.code == "xai_refresh_invalid_json"
+
+
+def test_xai_oauth_discovery_raises_typed_error_on_malformed_json(monkeypatch):
+    """Discovery is a cold-start, one-time fetch.  If the response is HTTP
+    200 with a non-JSON body (corporate proxy / captive portal returning
+    HTML), surface a typed AuthError rather than letting the
+    ``json.JSONDecodeError`` escape — so the message reads as an auth
+    problem instead of an internal parsing crash."""
+    from hermes_cli.auth import _xai_oauth_discovery
+
+    class _BadJSON:
+        status_code = 200
+
+        def json(self):
+            raise ValueError("Expecting value: line 1 column 1 (char 0)")
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.httpx.get",
+        lambda *a, **kw: _BadJSON(),
+    )
+    with pytest.raises(AuthError) as exc:
+        _xai_oauth_discovery()
+    assert exc.value.code == "xai_discovery_invalid_json"
+
+
+def test_xai_oauth_discovery_raises_typed_error_on_non_object_payload(monkeypatch):
+    """A discovery body that decodes as JSON but isn't an object (e.g. a
+    bare string or array) must not slip through and trigger an
+    ``AttributeError`` on ``payload.get(...)`` later.  Reject loudly
+    with the same incomplete-response code the missing-endpoint path uses."""
+    from hermes_cli.auth import _xai_oauth_discovery
+
+    class _StubResponse:
+        status_code = 200
+
+        def json(self):
+            return ["not", "an", "object"]
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.httpx.get",
+        lambda *a, **kw: _StubResponse(),
+    )
+    with pytest.raises(AuthError) as exc:
+        _xai_oauth_discovery()
+    assert exc.value.code == "xai_discovery_incomplete"
+
+
+# ---------------------------------------------------------------------------
+# OIDC discovery endpoint origin/scheme validation (MITM hardening)
+# ---------------------------------------------------------------------------
+
+
+def test_refresh_xai_oauth_pure_rejects_non_https_token_endpoint(monkeypatch):
+    """A poisoned auth.json (from MITM during initial discovery, or an older
+    Hermes that didn't validate) must not be silently honored on the refresh
+    hot path. A non-HTTPS ``token_endpoint`` would leak the refresh_token in
+    cleartext on every refresh; refuse before the POST."""
+    # No HTTP stub installed — refresh must fail at validation, not at POST.
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="http://auth.x.ai/oauth2/token"
+        )
+    assert exc.value.code == "xai_discovery_invalid"
+
+
+def test_refresh_xai_oauth_pure_rejects_off_origin_token_endpoint(monkeypatch):
+    """Pin the cached token_endpoint host to the xAI origin. A one-time MITM
+    during discovery could persist a token_endpoint on attacker-controlled
+    infrastructure — every subsequent refresh would silently leak the
+    refresh_token to that attacker. Refuse off-origin endpoints loudly so
+    the user can re-run discovery."""
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="https://evil.example.com/token"
+        )
+    assert exc.value.code == "xai_discovery_invalid"
+
+
+def test_refresh_xai_oauth_pure_rejects_lookalike_suffix(monkeypatch):
+    """Substring confusion: ``evil-x.ai`` ends in ``x.ai`` but is NOT a
+    ``.x.ai`` subdomain. The validator must enforce the leading-dot suffix
+    so attacker-registered apex lookalikes can't slip through."""
+    with pytest.raises(AuthError) as exc:
+        refresh_xai_oauth_pure(
+            "at", "rt", token_endpoint="https://evilx.ai/token"
+        )
+    assert exc.value.code == "xai_discovery_invalid"
+
+
+def test_refresh_xai_oauth_pure_accepts_apex_and_subdomain_endpoints(monkeypatch):
+    """The validator must accept BOTH the bare xAI apex (``x.ai``) and any
+    ``*.x.ai`` subdomain (e.g. ``auth.x.ai`` today, future migrations to
+    ``accounts.x.ai`` etc.). Without subdomain support we'd lock the
+    integration to whatever xAI happens to use today."""
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+    response = _StubHTTPResponse(
+        200,
+        {"access_token": new_access, "expires_in": 3600, "token_type": "Bearer"},
+    )
+    _patch_httpx_client(monkeypatch, response)
+    # auth.x.ai (current production)
+    updated = refresh_xai_oauth_pure(
+        "at", "rt", token_endpoint="https://auth.x.ai/oauth2/token"
+    )
+    assert updated["access_token"] == new_access
+    # hypothetical migration to accounts.x.ai
+    _patch_httpx_client(monkeypatch, response)
+    updated2 = refresh_xai_oauth_pure(
+        "at", "rt", token_endpoint="https://accounts.x.ai/token"
+    )
+    assert updated2["access_token"] == new_access
+
+
+def test_xai_oauth_discovery_validates_endpoints(monkeypatch):
+    """The discovery response itself goes through endpoint validation, so a
+    one-time MITM during initial login cannot poison ``auth.json`` with an
+    attacker-controlled ``token_endpoint``. (The persistence is what makes
+    this attack worth defending against — one MITM = forever credential
+    leak.)"""
+    from hermes_cli.auth import _xai_oauth_discovery
+
+    class _StubGetResponse:
+        status_code = 200
+
+        def __init__(self, payload):
+            self._payload = payload
+
+        def json(self):
+            return self._payload
+
+    def _fake_get(url, headers=None, timeout=None):
+        return _StubGetResponse({
+            "authorization_endpoint": "https://auth.x.ai/oauth2/authorize",
+            "token_endpoint": "https://evil.example.com/token",  # poisoned
+        })
+
+    monkeypatch.setattr("hermes_cli.auth.httpx.get", _fake_get)
+    with pytest.raises(AuthError) as exc:
+        _xai_oauth_discovery()
+    assert exc.value.code == "xai_discovery_invalid"
+
+
+def test_xai_oauth_discovery_validates_authorization_endpoint(monkeypatch):
+    """A poisoned ``authorization_endpoint`` is just as dangerous as a
+    poisoned ``token_endpoint``: it sends the user's browser (with their
+    logged-in xAI session cookies) to attacker infrastructure that can
+    phish the consent screen and exchange a stolen authorization code.
+
+    Both endpoints must be validated independently. This test pins the
+    parity so nobody can later "optimise" by validating only the token
+    endpoint and silently lose authorization-endpoint defense."""
+    from hermes_cli.auth import _xai_oauth_discovery
+
+    class _StubGetResponse:
+        status_code = 200
+
+        def __init__(self, payload):
+            self._payload = payload
+
+        def json(self):
+            return self._payload
+
+    def _fake_get(url, headers=None, timeout=None):
+        return _StubGetResponse({
+            "authorization_endpoint": "https://evil.example.com/authorize",  # poisoned
+            "token_endpoint": "https://auth.x.ai/oauth2/token",
+        })
+
+    monkeypatch.setattr("hermes_cli.auth.httpx.get", _fake_get)
+    with pytest.raises(AuthError) as exc:
+        _xai_oauth_discovery()
+    assert exc.value.code == "xai_discovery_invalid"
+
+
+# ---------------------------------------------------------------------------
+# Pool seeding from singleton
+# ---------------------------------------------------------------------------
+
+
+def test_credential_pool_seeds_xai_oauth_from_singleton(tmp_path, monkeypatch):
+    """After `hermes model` -> xai-oauth, the singleton holds tokens.  load_pool
+    must surface that as a pool entry so `hermes auth list` reflects truth and
+    refreshes route through the pool consistently with codex."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh, refresh_token="rt-1")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    pool = load_pool("xai-oauth")
+    assert pool.has_credentials()
+    entries = pool.entries()
+    assert len(entries) == 1
+    entry = entries[0]
+    assert entry.access_token == fresh
+    assert entry.refresh_token == "rt-1"
+    assert entry.source == "loopback_pkce"
+    assert entry.base_url == DEFAULT_XAI_OAUTH_BASE_URL
+
+
+def test_credential_pool_does_not_seed_when_singleton_missing_access_token(tmp_path, monkeypatch):
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    auth_store = {
+        "version": 1,
+        "providers": {
+            "xai-oauth": {
+                "tokens": {"access_token": "", "refresh_token": "rt"},
+                "auth_mode": "oauth_pkce",
+            }
+        },
+    }
+    (hermes_home / "auth.json").write_text(json.dumps(auth_store))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    pool = load_pool("xai-oauth")
+    assert not pool.has_credentials()
+
+
+def test_credential_pool_seed_respects_suppression(tmp_path, monkeypatch):
+    """`hermes auth remove xai-oauth <N>` for the seeded entry suppresses
+    further re-seeding so the removal is stable across load_pool calls."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Suppress the source — mimic `hermes auth remove`.
+    from hermes_cli.auth import suppress_credential_source
+
+    suppress_credential_source("xai-oauth", "loopback_pkce")
+
+    pool = load_pool("xai-oauth")
+    assert not pool.has_credentials()
+
+
+def test_auth_remove_xai_oauth_clears_singleton_and_sticks(tmp_path, monkeypatch):
+    """End-to-end regression: ``hermes auth remove xai-oauth 1`` for a
+    singleton-seeded entry must clear auth.json providers.xai-oauth AND
+    suppress further re-seeding — otherwise the next ``load_pool`` call
+    silently resurrects the entry from the still-present singleton, making
+    the user-facing removal a no-op (the entry reappears on the next
+    invocation with no warning).
+
+    The bug pre-fix: there was no RemovalStep registered for
+    (xai-oauth, loopback_pkce), so ``find_removal_step`` returned None
+    and ``auth_remove_command`` fell through to the "unregistered source —
+    nothing to clean up" branch. That branch is correct for ``manual``
+    entries (pool-only) but wrong for singleton-seeded loopback_pkce
+    entries (auth.json singleton survives the in-memory removal)."""
+    from agent.credential_pool import load_pool
+    from hermes_cli.auth_commands import auth_remove_command
+    from types import SimpleNamespace
+
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh, refresh_token="rt-1")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Confirm pre-state: pool sees the seeded entry, auth.json has the singleton.
+    pool = load_pool("xai-oauth")
+    assert pool.has_credentials()
+    raw = json.loads((hermes_home / "auth.json").read_text())
+    assert "xai-oauth" in raw.get("providers", {})
+
+    # Act: the user runs `hermes auth remove xai-oauth 1`.
+    auth_remove_command(SimpleNamespace(provider="xai-oauth", target="1"))
+
+    # Post-state: auth.json singleton must be cleared so a re-seed has
+    # nothing to import.
+    raw_after = json.loads((hermes_home / "auth.json").read_text())
+    assert "xai-oauth" not in raw_after.get("providers", {}), (
+        "auth.json providers.xai-oauth must be cleared — otherwise the "
+        "next load_pool() reseeds the removed entry from the surviving "
+        "singleton, silently undoing the user's removal."
+    )
+
+    # And the next load must not reseed the entry from anywhere.
+    pool_after = load_pool("xai-oauth")
+    assert not pool_after.has_credentials(), (
+        "Removal must stick across load_pool() calls — without the "
+        "loopback_pkce RemovalStep, the seed function reads the singleton "
+        "and rebuilds the entry on every Hermes invocation."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Pool sync-back to singleton after refresh
+# ---------------------------------------------------------------------------
+
+
+def test_pool_sync_back_writes_to_singleton(tmp_path, monkeypatch):
+    """When the pool refreshes a singleton-seeded xAI entry, the new tokens
+    must be written back to providers["xai-oauth"] so that
+    resolve_xai_oauth_runtime_credentials() (which reads the singleton)
+    doesn't keep using the consumed refresh token."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    expired = _jwt_with_exp(int(time.time()) - 10)
+    _setup_hermes_auth(hermes_home, access_token=expired, refresh_token="rt-old")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        assert refresh_token == "rt-old"
+        return {
+            "access_token": new_access,
+            "refresh_token": "rt-new",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T01:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    pool = load_pool("xai-oauth")
+    selected = pool.select()
+    assert selected is not None
+    assert selected.access_token == new_access
+    assert selected.refresh_token == "rt-new"
+
+    # Singleton must reflect refreshed tokens — otherwise the next process
+    # to load credentials would re-seed the consumed refresh token.
+    auth_path = hermes_home / "auth.json"
+    raw = json.loads(auth_path.read_text())
+    state = raw["providers"]["xai-oauth"]
+    assert state["tokens"]["access_token"] == new_access
+    assert state["tokens"]["refresh_token"] == "rt-new"
+    assert state["last_refresh"] == "2026-05-15T01:00:00Z"
+
+
+# ---------------------------------------------------------------------------
+# Runtime provider routing
+# ---------------------------------------------------------------------------
+
+
+def test_runtime_provider_uses_pool_entry_for_xai_oauth(tmp_path, monkeypatch):
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False)
+    monkeypatch.delenv("XAI_BASE_URL", raising=False)
+
+    runtime = resolve_runtime_provider(requested="xai-oauth")
+    assert runtime["provider"] == "xai-oauth"
+    assert runtime["api_mode"] == "codex_responses"
+    assert runtime["api_key"] == fresh
+    assert runtime["base_url"] == DEFAULT_XAI_OAUTH_BASE_URL
+
+
+def test_runtime_provider_default_base_url_when_pool_entry_missing_url(tmp_path, monkeypatch):
+    """Edge case: a pool entry that somehow has an empty base_url should still
+    surface the default xAI inference base URL instead of an empty string."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False)
+    monkeypatch.delenv("XAI_BASE_URL", raising=False)
+
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    pool = load_pool("xai-oauth")
+    pool.add_entry(
+        PooledCredential(
+            provider="xai-oauth",
+            id=uuid.uuid4().hex[:6],
+            label="test",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token=fresh,
+            refresh_token="rt",
+            base_url="",
+        )
+    )
+
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    runtime = resolve_runtime_provider(requested="xai-oauth")
+    assert runtime["provider"] == "xai-oauth"
+    assert runtime["api_mode"] == "codex_responses"
+    assert runtime["api_key"] == fresh
+    assert runtime["base_url"] == DEFAULT_XAI_OAUTH_BASE_URL
+
+
+# ---------------------------------------------------------------------------
+# Token-expiry behavior on the pool path
+# ---------------------------------------------------------------------------
+
+
+def test_pool_entry_needs_refresh_when_jwt_within_skew(tmp_path, monkeypatch):
+    """The pool's proactive-refresh gate must trigger when the JWT exp claim
+    is within the XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS window — otherwise a
+    near-expired token will hit the API and 401 unnecessarily.  Mirrors the
+    Codex skew-window behavior."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    from hermes_cli.auth import XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Token expires in 30s — well inside the 120s skew window.
+    near_expiry = _jwt_with_exp(int(time.time()) + 30)
+    pool = load_pool("xai-oauth")
+    entry = PooledCredential(
+        provider="xai-oauth",
+        id=uuid.uuid4().hex[:6],
+        label="test",
+        auth_type=AUTH_TYPE_OAUTH,
+        priority=0,
+        source="manual:xai_pkce",
+        access_token=near_expiry,
+        refresh_token="rt",
+        base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+    )
+    pool.add_entry(entry)
+    assert XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS > 30
+    assert pool._entry_needs_refresh(entry) is True
+
+
+def test_pool_entry_no_refresh_for_fresh_jwt(tmp_path, monkeypatch):
+    """A fresh JWT beyond the skew window must NOT trigger proactive refresh."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    pool = load_pool("xai-oauth")
+    entry = PooledCredential(
+        provider="xai-oauth",
+        id=uuid.uuid4().hex[:6],
+        label="test",
+        auth_type=AUTH_TYPE_OAUTH,
+        priority=0,
+        source="manual:xai_pkce",
+        access_token=fresh,
+        refresh_token="rt",
+        base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+    )
+    pool.add_entry(entry)
+    assert pool._entry_needs_refresh(entry) is False
+
+
+def test_pool_select_proactively_refreshes_expiring_token(tmp_path, monkeypatch):
+    """End-to-end: pool.select() with refresh=True on an expiring entry must
+    return the refreshed token.  This is the proactive path that runs BEFORE
+    the API call — separate from the 401-reactive path."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    near_expiry = _jwt_with_exp(int(time.time()) + 30)
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+
+    refresh_calls = {"count": 0}
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        refresh_calls["count"] += 1
+        assert refresh_token == "rt-old"
+        return {
+            "access_token": new_access,
+            "refresh_token": "rt-new",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T01:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    pool = load_pool("xai-oauth")
+    pool.add_entry(
+        PooledCredential(
+            provider="xai-oauth",
+            id=uuid.uuid4().hex[:6],
+            label="test",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token=near_expiry,
+            refresh_token="rt-old",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        )
+    )
+
+    selected = pool.select()
+    assert refresh_calls["count"] == 1
+    assert selected is not None
+    assert selected.access_token == new_access
+    assert selected.refresh_token == "rt-new"
+
+
+def test_pool_try_refresh_current_handles_xai_oauth(tmp_path, monkeypatch):
+    """The reactive 401-recovery path uses pool.try_refresh_current().  This
+    must work for xai-oauth alongside openai-codex — otherwise mid-call
+    expirations get propagated as hard failures instead of being retried with
+    fresh tokens."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Even a "fresh-looking" token gets force-refreshed via try_refresh_current.
+    # We simulate the scenario where the server rejected the token (401)
+    # despite client-side expiry math saying it's still valid (e.g. clock
+    # skew, server-side revocation, token bound to a session that expired).
+    seemingly_fresh = _jwt_with_exp(int(time.time()) + 3600)
+    new_access = _jwt_with_exp(int(time.time()) + 7200)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        return {
+            "access_token": new_access,
+            "refresh_token": "rt-rotated",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T02:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    pool = load_pool("xai-oauth")
+    pool.add_entry(
+        PooledCredential(
+            provider="xai-oauth",
+            id=uuid.uuid4().hex[:6],
+            label="test",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token=seemingly_fresh,
+            refresh_token="rt-old",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        )
+    )
+    pool.select()
+    refreshed = pool.try_refresh_current()
+    assert refreshed is not None
+    assert refreshed.access_token == new_access
+    assert refreshed.refresh_token == "rt-rotated"
+
+
+def test_pool_refresh_marks_entry_exhausted_on_failure(tmp_path, monkeypatch):
+    """When the xAI refresh endpoint rejects the refresh_token (e.g. consumed
+    by another process, revoked), the pool must surface the failure cleanly
+    rather than silently retaining stale tokens.  This is critical for the
+    failover path — _recover_with_credential_pool rotates to the next entry
+    only if try_refresh_current returns None."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    from hermes_cli.auth import AuthError
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    def _fake_refresh_fail(*args, **kwargs):
+        raise AuthError("refresh_token_reused", code="xai_refresh_failed", relogin_required=True)
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh_fail)
+
+    pool = load_pool("xai-oauth")
+    seemingly_fresh = _jwt_with_exp(int(time.time()) + 3600)
+    pool.add_entry(
+        PooledCredential(
+            provider="xai-oauth",
+            id=uuid.uuid4().hex[:6],
+            label="test",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token=seemingly_fresh,
+            refresh_token="rt-revoked",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        )
+    )
+    pool.select()
+    refreshed = pool.try_refresh_current()
+    # Refresh failure must return None so the caller falls through to
+    # credential rotation / friendly error display.
+    assert refreshed is None
+
+
+def test_pool_seeded_entry_sync_back_after_refresh(tmp_path, monkeypatch):
+    """When an entry seeded from the singleton (source='loopback_pkce')
+    is refreshed by the pool, the new tokens must be written back so a
+    fresh process load doesn't re-seed the now-consumed refresh token."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    near_expiry = _jwt_with_exp(int(time.time()) + 30)
+    _setup_hermes_auth(hermes_home, access_token=near_expiry, refresh_token="rt-singleton")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        assert refresh_token == "rt-singleton"
+        return {
+            "access_token": new_access,
+            "refresh_token": "rt-rotated",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T03:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    pool = load_pool("xai-oauth")
+    selected = pool.select()
+    assert selected is not None
+    assert selected.access_token == new_access
+
+    raw = json.loads((hermes_home / "auth.json").read_text())
+    tokens = raw["providers"]["xai-oauth"]["tokens"]
+    assert tokens["access_token"] == new_access
+    assert tokens["refresh_token"] == "rt-rotated"
+
+
+def test_pool_refresh_adopts_singleton_tokens_when_consumed_elsewhere(tmp_path, monkeypatch):
+    """Multi-process race: another Hermes process refreshed the singleton
+    (rotating the refresh_token) while this process held a stale in-memory
+    pool entry.  ``_refresh_entry`` must adopt the fresher singleton tokens
+    BEFORE spending its own (now-consumed) refresh_token, otherwise the
+    refresh POST would replay the consumed token and fail with
+    ``refresh_token_reused``.
+
+    Mirrors the proactive sync codex/nous already perform for the same
+    reason, and is what makes the pool actually safe to share across
+    profiles + Hermes processes."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    in_memory_at = _jwt_with_exp(int(time.time()) + 30)  # near-expiry
+    _setup_hermes_auth(hermes_home, access_token=in_memory_at, refresh_token="rt-stale")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Load the pool once so the in-memory entry is seeded with rt-stale.
+    pool = load_pool("xai-oauth")
+
+    # Now simulate "another process refreshed the tokens" by overwriting
+    # the singleton on disk WITHOUT touching this process's pool object.
+    other_process_at = _jwt_with_exp(int(time.time()) + 3600)
+    raw = json.loads((hermes_home / "auth.json").read_text())
+    raw["providers"]["xai-oauth"]["tokens"] = {
+        "access_token": other_process_at,
+        "refresh_token": "rt-rotated-by-other-process",
+        "id_token": "",
+        "expires_in": 3600,
+        "token_type": "Bearer",
+    }
+    (hermes_home / "auth.json").write_text(json.dumps(raw))
+
+    refresh_calls = {"refresh_token_seen": None}
+    final_at = _jwt_with_exp(int(time.time()) + 7200)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        # The pool MUST have adopted the rotated token from auth.json before
+        # POSTing the refresh — otherwise it would replay the stale one.
+        refresh_calls["refresh_token_seen"] = refresh_token
+        return {
+            "access_token": final_at,
+            "refresh_token": "rt-final",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T05:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    selected = pool.select()
+    assert selected is not None
+    assert refresh_calls["refresh_token_seen"] == "rt-rotated-by-other-process"
+    assert selected.access_token == final_at
+
+
+def test_pool_refresh_recovers_when_other_process_already_refreshed(tmp_path, monkeypatch):
+    """Variant of the multi-process race where the other process refreshes
+    BETWEEN our proactive sync and the HTTP POST.  Our refresh fails with a
+    consumed-token error; we must re-check auth.json, find the fresh pair
+    (written by the racing process), and adopt it instead of marking the
+    entry exhausted."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    in_memory_at = _jwt_with_exp(int(time.time()) + 30)
+    _setup_hermes_auth(hermes_home, access_token=in_memory_at, refresh_token="rt-shared")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    pool = load_pool("xai-oauth")
+
+    other_process_at = _jwt_with_exp(int(time.time()) + 3600)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        # Simulate the racing process winning at the auth server right
+        # before our POST: by the time we reach this call, auth.json
+        # already holds the fresher pair, but we POSTed with rt-shared.
+        raw = json.loads((hermes_home / "auth.json").read_text())
+        raw["providers"]["xai-oauth"]["tokens"] = {
+            "access_token": other_process_at,
+            "refresh_token": "rt-rotated",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+        }
+        (hermes_home / "auth.json").write_text(json.dumps(raw))
+        raise AuthError(
+            "refresh_token_reused",
+            provider="xai-oauth",
+            code="xai_refresh_failed",
+            relogin_required=True,
+        )
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    selected = pool.select()
+    # Even though refresh_xai_oauth_pure raised, the post-failure
+    # recovery path should adopt the fresher singleton tokens.
+    assert selected is not None
+    assert selected.access_token == other_process_at
+    assert selected.refresh_token == "rt-rotated"
+
+
+def test_pool_exhausted_xai_entry_recovers_after_singleton_refresh(tmp_path, monkeypatch):
+    """When a singleton-seeded entry is parked as STATUS_EXHAUSTED and the
+    user runs ``hermes model`` -> xAI Grok OAuth (or another process
+    refreshes), the next ``_available_entries`` pass must adopt the fresh
+    auth.json tokens instead of leaving the entry frozen until the
+    cooldown elapses.  Mirrors the codex/nous self-heal pattern."""
+    from agent.credential_pool import load_pool, STATUS_EXHAUSTED
+    from dataclasses import replace
+
+    hermes_home = tmp_path / "hermes"
+    stale_at = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=stale_at, refresh_token="rt-stale")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    pool = load_pool("xai-oauth")
+    seeded = pool.entries()[0]
+    assert seeded.source == "loopback_pkce"
+
+    # Park the seeded entry as exhausted with a far-future cooldown so
+    # without resync it would never be selectable.
+    exhausted = replace(
+        seeded,
+        last_status=STATUS_EXHAUSTED,
+        last_status_at=time.time(),
+        last_error_code=401,
+        last_error_reset_at=time.time() + 3600,  # 1h cooldown
+    )
+    pool._replace_entry(seeded, exhausted)
+    pool._persist()
+    assert pool.has_credentials()
+    assert not pool.has_available()  # cooldown blocks everything
+
+    # Simulate the user re-running `hermes model` -> xAI Grok OAuth: the
+    # singleton now has fresh tokens.
+    fresh_at = _jwt_with_exp(int(time.time()) + 7200)
+    raw = json.loads((hermes_home / "auth.json").read_text())
+    raw["providers"]["xai-oauth"]["tokens"] = {
+        "access_token": fresh_at,
+        "refresh_token": "rt-fresh",
+        "id_token": "",
+        "expires_in": 3600,
+        "token_type": "Bearer",
+    }
+    (hermes_home / "auth.json").write_text(json.dumps(raw))
+
+    # _available_entries must sync from the singleton, lifting the
+    # exhausted state for the seeded entry.
+    available = pool._available_entries(clear_expired=True, refresh=False)
+    assert len(available) == 1
+    assert available[0].access_token == fresh_at
+    assert available[0].refresh_token == "rt-fresh"
+    assert available[0].last_status != STATUS_EXHAUSTED
+
+
+def test_pool_manual_xai_entry_not_synced_from_singleton(tmp_path, monkeypatch):
+    """Sync from the singleton must apply ONLY to the singleton-seeded
+    entry (source='loopback_pkce').  Manually added entries (e.g. via
+    ``hermes auth add xai-oauth``) own their own refresh-token lifecycle
+    and must not be silently overwritten when the user logs in via
+    ``hermes model``."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    singleton_at = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=singleton_at, refresh_token="rt-singleton")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    pool = load_pool("xai-oauth")
+
+    manual_at_old = _jwt_with_exp(int(time.time()) + 30)
+    pool.add_entry(
+        PooledCredential(
+            provider="xai-oauth",
+            id=uuid.uuid4().hex[:6],
+            label="manual",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=1,
+            source="manual:xai_pkce",
+            access_token=manual_at_old,
+            refresh_token="rt-manual",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        )
+    )
+    manual_entry = next(e for e in pool.entries() if e.source == "manual:xai_pkce")
+    synced = pool._sync_xai_oauth_entry_from_auth_store(manual_entry)
+    # Same object — no sync happened.
+    assert synced is manual_entry
+    assert synced.access_token == manual_at_old
+    assert synced.refresh_token == "rt-manual"
+
+
+def test_pool_manual_entry_does_not_sync_back_to_singleton(tmp_path, monkeypatch):
+    """`hermes auth add xai-oauth` entries (source='manual:xai_pkce') are
+    independent credentials and must NOT write to the singleton.  Sync-back
+    is restricted to entries seeded from the singleton.  Otherwise adding a
+    second pool credential would silently overwrite the user's main login."""
+    from agent.credential_pool import load_pool, AUTH_TYPE_OAUTH, PooledCredential
+    import uuid
+
+    hermes_home = tmp_path / "hermes"
+    # Singleton has its own tokens (separate login).
+    singleton_at = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=singleton_at, refresh_token="rt-singleton")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    manual_at_old = _jwt_with_exp(int(time.time()) + 30)
+    manual_at_new = _jwt_with_exp(int(time.time()) + 7200)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        assert refresh_token == "rt-manual"
+        return {
+            "access_token": manual_at_new,
+            "refresh_token": "rt-manual-new",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T04:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    pool = load_pool("xai-oauth")
+    pool.add_entry(
+        PooledCredential(
+            provider="xai-oauth",
+            id=uuid.uuid4().hex[:6],
+            label="manual",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token=manual_at_old,
+            refresh_token="rt-manual",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        )
+    )
+    # Refresh the manual entry — singleton must be left alone.
+    manual_entries = [e for e in pool.entries() if e.source == "manual:xai_pkce"]
+    assert len(manual_entries) == 1
+    pool._refresh_entry(manual_entries[0], force=True)
+
+    raw = json.loads((hermes_home / "auth.json").read_text())
+    tokens = raw["providers"]["xai-oauth"]["tokens"]
+    # Singleton must be untouched — manual refresh shouldn't leak across.
+    assert tokens["access_token"] == singleton_at
+    assert tokens["refresh_token"] == "rt-singleton"
+
+
+# ---------------------------------------------------------------------------
+# Auxiliary client routing
+# ---------------------------------------------------------------------------
+
+
+def test_auxiliary_client_routes_xai_oauth_through_responses_api(tmp_path, monkeypatch):
+    """Without explicit xai-oauth handling in ``resolve_provider_client``, an
+    xai-oauth main provider falls through to the generic ``oauth_external``
+    arm and returns ``(None, None)`` — silently re-routing every auxiliary
+    task (compression, curator, web extract, session search, ...) to
+    whatever Step-2 fallback chain the user has configured (OpenRouter,
+    Nous, etc.).  Users on xAI Grok OAuth would then see surprise charges
+    on those side providers for side tasks they thought were running on
+    their xAI subscription.
+
+    Pin the routing contract: ``resolve_provider_client("xai-oauth", model)``
+    must return a non-None client wrapping the xAI Responses API."""
+    from agent.auxiliary_client import (
+        CodexAuxiliaryClient,
+        resolve_provider_client,
+    )
+
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False)
+    monkeypatch.delenv("XAI_BASE_URL", raising=False)
+
+    client, model = resolve_provider_client("xai-oauth", model="grok-4")
+    assert client is not None, (
+        "xai-oauth must route to a Responses-API client; falling through to "
+        "the generic oauth_external branch silently swaps providers for "
+        "every auxiliary task."
+    )
+    assert isinstance(client, CodexAuxiliaryClient)
+    assert model == "grok-4"
+    # The wrapper preserves base_url + api_key so async wrappers and cache
+    # eviction can introspect them.  Pin both to the live xAI runtime.
+    assert str(client.base_url).rstrip("/") == DEFAULT_XAI_OAUTH_BASE_URL
+    assert client.api_key == fresh
+
+
+def test_auxiliary_client_xai_oauth_returns_none_when_unauthenticated(tmp_path, monkeypatch):
+    """No xAI OAuth tokens in the auth store → ``resolve_provider_client``
+    must return ``(None, None)`` so ``_resolve_auto`` falls through to the
+    next provider in the chain instead of crashing or constructing a
+    misconfigured client."""
+    from agent.auxiliary_client import resolve_provider_client
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    client, model = resolve_provider_client("xai-oauth", model="grok-4")
+    assert client is None
+    assert model is None
+
+
+def test_auxiliary_client_xai_oauth_requires_explicit_model(tmp_path, monkeypatch):
+    """xAI's Responses API has no safe "cheap aux model" default —
+    pinning one would silently rot the same way Codex's did.  Callers
+    must pass an explicit model (auxiliary.<task>.model in config.yaml)."""
+    from agent.auxiliary_client import resolve_provider_client
+
+    hermes_home = tmp_path / "hermes"
+    fresh = _jwt_with_exp(int(time.time()) + 3600)
+    _setup_hermes_auth(hermes_home, access_token=fresh)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    client, model = resolve_provider_client("xai-oauth", model=None)
+    assert client is None
+    assert model is None
+
+
+# ---------------------------------------------------------------------------
+# active_provider preservation on pool sync-back
+# ---------------------------------------------------------------------------
+
+
+def test_pool_sync_back_preserves_active_provider(tmp_path, monkeypatch):
+    """A token-rotation sync-back is a side effect of refresh, not the user
+    picking a provider.  ``_save_provider_state`` flips ``active_provider``;
+    using it on the sync-back path means every xAI/Codex/Nous refresh in a
+    multi-provider setup silently overrides the user's chosen active
+    provider (visible to ``hermes auth status``, ``hermes setup``, and the
+    ``hermes`` no-arg dispatcher).  Pin the ``set_active=False`` contract so
+    no future refactor regresses to the legacy semantic."""
+    from agent.credential_pool import load_pool
+
+    hermes_home = tmp_path / "hermes"
+    near_expiry = _jwt_with_exp(int(time.time()) + 30)
+    _setup_hermes_auth(hermes_home, access_token=near_expiry, refresh_token="rt-xai")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Simulate a multi-provider user whose actual chosen provider is
+    # OpenRouter — xai-oauth tokens exist in the singleton but are NOT
+    # the active provider.
+    raw = json.loads((hermes_home / "auth.json").read_text())
+    raw["active_provider"] = "openrouter"
+    (hermes_home / "auth.json").write_text(json.dumps(raw))
+
+    new_access = _jwt_with_exp(int(time.time()) + 3600)
+
+    def _fake_refresh(access_token, refresh_token, **kwargs):
+        return {
+            "access_token": new_access,
+            "refresh_token": "rt-rotated",
+            "id_token": "",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "last_refresh": "2026-05-15T10:00:00Z",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _fake_refresh)
+
+    pool = load_pool("xai-oauth")
+    selected = pool.select()
+    assert selected is not None
+    assert selected.access_token == new_access
+
+    # The refresh wrote new tokens back into the singleton — the user's
+    # prior ``active_provider`` choice (openrouter) MUST survive.
+    raw_after = json.loads((hermes_home / "auth.json").read_text())
+    assert raw_after["active_provider"] == "openrouter", (
+        "pool sync-back must not flip active_provider; otherwise xAI/Codex/"
+        "Nous token rotations silently take over multi-provider users' "
+        "auth.json `active_provider` flag."
+    )
+    # Tokens were actually written so the next process won't replay the
+    # consumed refresh_token (preserves the original sync-back fix).
+    state = raw_after["providers"]["xai-oauth"]["tokens"]
+    assert state["access_token"] == new_access
+    assert state["refresh_token"] == "rt-rotated"
diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py
index b5cfdf16a9b..88ce31813e4 100644
--- a/tests/plugins/image_gen/test_xai_provider.py
+++ b/tests/plugins/image_gen/test_xai_provider.py
@@ -72,10 +72,13 @@ class TestXAIImageGenProvider:
 
         provider = XAIImageGenProvider()
         schema = provider.get_setup_schema()
-        assert schema["name"] == "xAI (Grok)"
+        assert schema["name"] == "xAI Grok Imagine (image)"
         assert schema["badge"] == "paid"
-        assert len(schema["env_vars"]) == 1
-        assert schema["env_vars"][0]["key"] == "XAI_API_KEY"
+        # Auth resolution is delegated to the shared "xai_grok" post_setup
+        # hook so the picker doesn't blindly prompt for XAI_API_KEY when the
+        # user is already signed in via xAI Grok OAuth.
+        assert schema["env_vars"] == []
+        assert schema["post_setup"] == "xai_grok"
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/plugins/video_gen/test_xai_plugin.py b/tests/plugins/video_gen/test_xai_plugin.py
index 25695d852e5..bd7a880fdee 100644
--- a/tests/plugins/video_gen/test_xai_plugin.py
+++ b/tests/plugins/video_gen/test_xai_plugin.py
@@ -54,6 +54,50 @@ def test_xai_generate_requires_xai_key(monkeypatch):
     assert result["error_type"] == "auth_required"
 
 
+def test_xai_available_with_oauth_only(monkeypatch):
+    """The plugin must honour xAI Grok OAuth credentials, not just
+    XAI_API_KEY. Otherwise the agent's tool-availability check filters
+    ``video_generate`` out of the toolbelt and the agent silently falls
+    back to whatever skill advertises video generation (e.g. comfyui).
+    """
+    import plugins.video_gen.xai as xai_plugin
+
+    monkeypatch.delenv("XAI_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "tools.xai_http.resolve_xai_http_credentials",
+        lambda: {
+            "provider": "xai-oauth",
+            "api_key": "oauth-bearer-token",
+            "base_url": "https://api.x.ai/v1",
+        },
+    )
+
+    assert xai_plugin.XAIVideoGenProvider().is_available() is True
+
+
+def test_xai_resolved_credentials_threaded_through_request(monkeypatch):
+    """OAuth-resolved creds must reach the HTTP layer — bug class where
+    ``is_available()`` says yes but the request still hits with no key.
+    """
+    import plugins.video_gen.xai as xai_plugin
+
+    monkeypatch.delenv("XAI_API_KEY", raising=False)
+    monkeypatch.setattr(
+        "tools.xai_http.resolve_xai_http_credentials",
+        lambda: {
+            "provider": "xai-oauth",
+            "api_key": "oauth-bearer-token",
+            "base_url": "https://api.x.ai/v1",
+        },
+    )
+
+    api_key, base_url = xai_plugin._resolve_xai_credentials()
+    assert api_key == "oauth-bearer-token"
+    assert base_url == "https://api.x.ai/v1"
+    headers = xai_plugin._xai_headers(api_key)
+    assert headers["Authorization"] == "Bearer oauth-bearer-token"
+
+
 def test_xai_no_operation_kwarg():
     """The ABC's generate() signature no longer accepts 'operation'.
     Passing it through **kwargs should be ignored (forward-compat)."""
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 47c491c441c..8cc02629523 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -578,6 +578,197 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
     assert result["final_response"] == "Recovered after refresh"
 
 
+def _build_xai_oauth_agent(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="grok-code-fast-1",
+        provider="xai-oauth",
+        api_mode="codex_responses",
+        base_url="https://api.x.ai/v1",
+        api_key="xai-oauth-token",
+        quiet_mode=True,
+        max_iterations=4,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._cleanup_task_resources = lambda task_id: None
+    agent._persist_session = lambda messages, history=None: None
+    agent._save_trajectory = lambda messages, user_message, completed: None
+    agent._save_session_log = lambda messages: None
+    return agent
+
+
+def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch):
+    """xai-oauth + codex_responses must route prompt caching via the
+    ``prompt_cache_key`` body field on /v1/responses (xAI's documented
+    Responses-API cache key — see docs.x.ai prompt-caching/maximizing-
+    cache-hits).
+
+    We pass it through ``extra_body`` rather than as a top-level kwarg so
+    the body field is serialized into JSON regardless of whether the
+    installed openai SDK build still accepts ``prompt_cache_key`` on
+    ``Responses.stream()``. Older or trimmed SDK builds drop it from the
+    signature and would otherwise raise ``TypeError`` before the request
+    reaches api.x.ai. The ``x-grok-conv-id`` header is retained as a
+    belt-and-braces fallback for clients/proxies that route on headers."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    kwargs = agent._build_api_kwargs(
+        [
+            {"role": "system", "content": "You are Hermes."},
+            {"role": "user", "content": "Ping"},
+        ]
+    )
+
+    assert kwargs.get("model") == "grok-code-fast-1"
+    # Top-level kwarg must NOT be set — that's the openai SDK
+    # incompatibility this whole indirection exists to dodge.
+    assert "prompt_cache_key" not in kwargs
+    extra_body = kwargs.get("extra_body") or {}
+    assert extra_body.get("prompt_cache_key"), (
+        "xAI prompt-cache routing must travel via extra_body.prompt_cache_key "
+        "for /v1/responses — body field is the documented surface."
+    )
+    headers = kwargs.get("extra_headers") or {}
+    assert "x-grok-conv-id" in headers, (
+        "x-grok-conv-id header kept as belt-and-braces fallback for clients "
+        "that route on headers."
+    )
+
+
+def test_run_conversation_xai_oauth_refreshes_after_401_and_retries(monkeypatch):
+    """xai-oauth speaks the Responses API just like codex.  When the access
+    token is rejected mid-call (401), the same proactive refresh-and-retry
+    handler that fires for openai-codex must also fire for xai-oauth — the
+    bug it caught: the gating condition checked only ``provider == "openai-codex"``,
+    so xai-oauth 401s leaked straight to non-retryable abort path with no
+    chance to swap in a freshly refreshed access token."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    calls = {"api": 0, "refresh": 0}
+
+    class _UnauthorizedError(RuntimeError):
+        def __init__(self):
+            super().__init__("Error code: 401 - unauthorized")
+            self.status_code = 401
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            raise _UnauthorizedError()
+        return _codex_message_response("Recovered after xAI refresh")
+
+    def _fake_refresh(*, force=True):
+        calls["refresh"] += 1
+        assert force is True
+        return True
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+    monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
+
+    result = agent.run_conversation("Say OK")
+
+    assert calls["api"] == 2
+    assert calls["refresh"] == 1
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered after xAI refresh"
+
+
+def test_try_refresh_codex_client_credentials_handles_xai_oauth(monkeypatch):
+    """``_try_refresh_codex_client_credentials`` must rebuild the OpenAI
+    client with freshly resolved xAI OAuth credentials when the active
+    provider is xai-oauth.  The function name is shared between codex and
+    xai-oauth (both speak codex_responses) — covering both cases prevents
+    silent regressions where the function gets gated to a single provider."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    closed = {"value": False}
+    rebuilt = {"kwargs": None}
+
+    class _ExistingClient:
+        def close(self):
+            closed["value"] = True
+
+    class _RebuiltClient:
+        pass
+
+    def _fake_openai(**kwargs):
+        rebuilt["kwargs"] = kwargs
+        return _RebuiltClient()
+
+    def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
+        # The pre-refresh guard reads the singleton with refresh_if_expiring=False
+        # to verify that the agent's active key still matches; the actual
+        # refresh later passes force_refresh=True.  Both calls must succeed.
+        return {
+            "api_key": "fresh-xai-token" if force_refresh else agent.api_key,
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
+        _fake_resolve,
+    )
+    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
+
+    agent.client = _ExistingClient()
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is True
+    assert closed["value"] is True
+    assert rebuilt["kwargs"]["api_key"] == "fresh-xai-token"
+    assert rebuilt["kwargs"]["base_url"] == "https://api.x.ai/v1"
+    assert isinstance(agent.client, _RebuiltClient)
+    assert agent.api_key == "fresh-xai-token"
+
+
+def test_try_refresh_codex_client_credentials_skips_xai_oauth_when_singleton_differs(monkeypatch):
+    """An xai-oauth agent constructed with a non-singleton credential
+    (e.g. a manual pool entry whose tokens belong to a different account
+    than the loopback_pkce singleton, or an explicit ``api_key=`` arg)
+    MUST NOT silently adopt the singleton's tokens on a 401 reactive
+    refresh.  Otherwise a 401 mid-conversation would re-route the rest
+    of the conversation onto a different account, with no user feedback.
+
+    The credential pool's reactive recovery is the right channel for
+    pool-managed credentials; this fallback path is for the singleton-
+    only case and must short-circuit when the active key differs."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    # Agent is using "xai-oauth-token" (per the builder); singleton holds
+    # a *different* account's token.  No force_refresh should fire.
+    refresh_calls = {"count": 0}
+
+    def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
+        if force_refresh:
+            refresh_calls["count"] += 1
+            return {
+                "api_key": "singleton-account-token",
+                "base_url": "https://api.x.ai/v1",
+            }
+        # The pre-refresh guard read — return the singleton's view of the
+        # singleton's token, which is NOT what the agent is currently using.
+        return {
+            "api_key": "singleton-account-token",
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
+        _fake_resolve,
+    )
+
+    pre_refresh_key = agent.api_key
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is False, (
+        "must not refresh when the active credential isn't the singleton; "
+        "otherwise the conversation silently swaps accounts mid-flight."
+    )
+    assert refresh_calls["count"] == 0, (
+        "force_refresh must not run — that would mutate the singleton's "
+        "tokens on disk and consume its single-use refresh_token for an "
+        "agent that wasn't even using the singleton."
+    )
+    assert agent.api_key == pre_refresh_key
+
+
 def test_run_conversation_copilot_refreshes_after_401_and_retries(monkeypatch):
     agent = _build_copilot_agent(monkeypatch)
     calls = {"api": 0, "refresh": 0}
@@ -624,12 +815,18 @@ def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
         rebuilt["kwargs"] = kwargs
         return _RebuiltClient()
 
+    def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
+        # Pre-refresh guard reads the singleton (refresh_if_expiring=False).
+        # It must report the agent's current api_key so the equality check
+        # passes; only then does the actual force_refresh run.
+        return {
+            "api_key": "new-codex-token" if force_refresh else agent.api_key,
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        }
+
     monkeypatch.setattr(
         "hermes_cli.auth.resolve_codex_runtime_credentials",
-        lambda force_refresh=True: {
-            "api_key": "new-codex-token",
-            "base_url": "https://chatgpt.com/backend-api/codex",
-        },
+        _fake_resolve,
     )
     monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
 
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 942fba01120..6f6d2f8c2a3 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -266,10 +266,12 @@ def _get_provider(stt_config: dict) -> str:
             return "none"
 
         if provider == "xai":
-            if get_env_value("XAI_API_KEY"):
+            from tools.xai_http import resolve_xai_http_credentials
+
+            if resolve_xai_http_credentials().get("api_key"):
                 return "xai"
             logger.warning(
-                "STT provider 'xai' configured but XAI_API_KEY not set"
+                "STT provider 'xai' configured but no xAI credentials are available"
             )
             return "none"
 
@@ -289,9 +291,14 @@ def _get_provider(stt_config: dict) -> str:
     if _HAS_OPENAI and _has_openai_audio_backend():
         logger.info("No local STT available, using OpenAI Whisper API")
         return "openai"
-    if get_env_value("XAI_API_KEY"):
-        logger.info("No local STT available, using xAI Grok STT API")
-        return "xai"
+    try:
+        from tools.xai_http import resolve_xai_http_credentials
+
+        if resolve_xai_http_credentials().get("api_key"):
+            logger.info("No local STT available, using xAI Grok STT API")
+            return "xai"
+    except Exception:
+        pass
     return "none"
 
 # ---------------------------------------------------------------------------
@@ -704,14 +711,22 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]:
     Supports Inverse Text Normalization, diarization, and word-level timestamps.
     Requires ``XAI_API_KEY`` environment variable.
     """
-    api_key = get_env_value("XAI_API_KEY")
+    from tools.xai_http import resolve_xai_http_credentials
+
+    creds = resolve_xai_http_credentials()
+    api_key = str(creds.get("api_key") or "").strip()
     if not api_key:
-        return {"success": False, "transcript": "", "error": "XAI_API_KEY not set"}
+        return {
+            "success": False,
+            "transcript": "",
+            "error": "No xAI credentials found. Configure xAI OAuth in `hermes model` or set XAI_API_KEY",
+        }
 
     stt_config = _load_stt_config()
     xai_config = stt_config.get("xai", {})
     base_url = str(
         xai_config.get("base_url")
+        or creds.get("base_url")
         or get_env_value("XAI_STT_BASE_URL")
         or XAI_STT_BASE_URL
     ).strip().rstrip("/")
@@ -872,7 +887,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
             "No STT provider available. Install faster-whisper for free local "
             f"transcription, configure {LOCAL_STT_COMMAND_ENV} or install a local whisper CLI, "
             "set GROQ_API_KEY for free Groq Whisper, set MISTRAL_API_KEY for Mistral "
-            "Voxtral Transcribe, set XAI_API_KEY for xAI Grok STT, or set VOICE_TOOLS_OPENAI_KEY "
+            "Voxtral Transcribe, configure xAI OAuth or set XAI_API_KEY for xAI Grok STT, or set VOICE_TOOLS_OPENAI_KEY "
             "or OPENAI_API_KEY for the OpenAI Whisper API."
         ),
     }
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 9f0d272dac0..57907f76833 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -9,7 +9,7 @@ Built-in TTS providers:
 - MiniMax TTS: High-quality with voice cloning, needs MINIMAX_API_KEY
 - Mistral (Voxtral TTS): Multilingual, native Opus, needs MISTRAL_API_KEY
 - Google Gemini TTS: Controllable, 30 prebuilt voices, needs GEMINI_API_KEY
-- xAI TTS: Grok voices, needs XAI_API_KEY
+- xAI TTS: Grok voices, uses xAI Grok OAuth credentials or XAI_API_KEY
 - NeuTTS (local, free, no API key): On-device TTS via neutts
 - KittenTTS (local, free, no API key): On-device 25MB model
 - Piper (local, free, no API key): OHF-Voice/piper1-gpl neural VITS, 44 languages
@@ -902,9 +902,12 @@ def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -
     """
     import requests
 
-    api_key = (get_env_value("XAI_API_KEY") or "").strip()
+    from tools.xai_http import resolve_xai_http_credentials
+
+    creds = resolve_xai_http_credentials()
+    api_key = str(creds.get("api_key") or "").strip()
     if not api_key:
-        raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/")
+        raise ValueError("No xAI credentials found. Configure xAI OAuth in `hermes model` or set XAI_API_KEY.")
 
     xai_config = tts_config.get("xai", {})
     voice_id = str(xai_config.get("voice_id", DEFAULT_XAI_VOICE_ID)).strip() or DEFAULT_XAI_VOICE_ID
@@ -913,6 +916,7 @@ def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -
     bit_rate = int(xai_config.get("bit_rate", DEFAULT_XAI_BIT_RATE))
     base_url = str(
         xai_config.get("base_url")
+        or creds.get("base_url")
         or get_env_value("XAI_BASE_URL")
         or DEFAULT_XAI_BASE_URL
     ).strip().rstrip("/")
@@ -1917,8 +1921,13 @@ def check_tts_requirements() -> bool:
         pass
     if get_env_value("MINIMAX_API_KEY"):
         return True
-    if get_env_value("XAI_API_KEY"):
-        return True
+    try:
+        from tools.xai_http import resolve_xai_http_credentials
+
+        if resolve_xai_http_credentials().get("api_key"):
+            return True
+    except Exception:
+        pass
     if get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY"):
         return True
     try:
diff --git a/tools/xai_http.py b/tools/xai_http.py
index b5bce97c2f4..fbb7961d244 100644
--- a/tools/xai_http.py
+++ b/tools/xai_http.py
@@ -2,6 +2,9 @@
 
 from __future__ import annotations
 
+import os
+from typing import Dict
+
 
 def hermes_xai_user_agent() -> str:
     """Return a stable Hermes-specific User-Agent for xAI HTTP calls."""
@@ -10,3 +13,49 @@ def hermes_xai_user_agent() -> str:
     except Exception:
         __version__ = "unknown"
     return f"Hermes-Agent/{__version__}"
+
+
+def resolve_xai_http_credentials() -> Dict[str, str]:
+    """Resolve bearer credentials for direct xAI HTTP endpoints.
+
+    Prefers Hermes-managed xAI OAuth credentials when available, then falls back
+    to ``XAI_API_KEY`` from the environment. This keeps direct xAI endpoints
+    (images, TTS, STT, etc.) aligned with the main runtime auth model.
+    """
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+
+        runtime = resolve_runtime_provider(requested="xai-oauth")
+        access_token = str(runtime.get("api_key") or "").strip()
+        base_url = str(runtime.get("base_url") or "").strip().rstrip("/")
+        if access_token:
+            return {
+                "provider": "xai-oauth",
+                "api_key": access_token,
+                "base_url": base_url or "https://api.x.ai/v1",
+            }
+    except Exception:
+        pass
+
+    try:
+        from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+        creds = resolve_xai_oauth_runtime_credentials()
+        access_token = str(creds.get("api_key") or "").strip()
+        base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+        if access_token:
+            return {
+                "provider": "xai-oauth",
+                "api_key": access_token,
+                "base_url": base_url or "https://api.x.ai/v1",
+            }
+    except Exception:
+        pass
+
+    api_key = os.getenv("XAI_API_KEY", "").strip()
+    base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
+    return {
+        "provider": "xai",
+        "api_key": api_key,
+        "base_url": base_url,
+    }
diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md
new file mode 100644
index 00000000000..49c7087621a
--- /dev/null
+++ b/website/docs/guides/xai-grok-oauth.md
@@ -0,0 +1,214 @@
+---
+sidebar_position: 16
+title: "xAI Grok OAuth (SuperGrok Subscription)"
+description: "Sign in with your SuperGrok subscription to use Grok models in Hermes Agent — no API key required"
+---
+
+# xAI Grok OAuth (SuperGrok Subscription)
+
+Hermes Agent supports xAI Grok through a browser-based OAuth login flow against [accounts.x.ai](https://accounts.x.ai), using your existing **SuperGrok subscription**. No `XAI_API_KEY` is required — log in once and Hermes automatically refreshes your session in the background.
+
+The transport reuses the `codex_responses` adapter (xAI exposes a Responses-style endpoint), so reasoning, tool-calling, streaming, and prompt caching work without any adapter changes.
+
+The same OAuth bearer token is also reused by every direct-to-xAI surface in Hermes — TTS, image generation, video generation, and transcription — so a single login covers all four.
+
+## Overview
+
+| Item | Value |
+|------|-------|
+| Provider ID | `xai-oauth` |
+| Display name | xAI Grok OAuth (SuperGrok Subscription) |
+| Auth type | Browser OAuth 2.0 PKCE (loopback callback) |
+| Transport | xAI Responses API (`codex_responses`) |
+| Default model | `grok-4.3` |
+| Endpoint | `https://api.x.ai/v1` |
+| Auth server | `https://accounts.x.ai` |
+| Requires env var | No (`XAI_API_KEY` is **not** used for this provider) |
+| Subscription | [SuperGrok](https://x.ai/grok) (any active tier) |
+
+## Prerequisites
+
+- Python 3.9+
+- Hermes Agent installed
+- An active SuperGrok subscription on your xAI account
+- A browser available on the local machine (or use `--no-browser` for remote sessions)
+
+## Quick Start
+
+```bash
+# Launch the provider and model picker
+hermes model
+# → Select "xAI Grok OAuth (SuperGrok Subscription)" from the provider list
+# → Hermes opens your browser to accounts.x.ai
+# → Approve access in the browser
+# → Pick a model (grok-4.3 is at the top)
+# → Start chatting
+
+hermes
+```
+
+After the first login, credentials are stored under `~/.hermes/auth.json` and refreshed automatically before they expire.
+
+## Logging In Manually
+
+You can trigger a login without going through the model picker:
+
+```bash
+hermes auth add xai-oauth
+```
+
+### Remote / headless sessions
+
+On servers, containers, or SSH sessions where no browser is available, Hermes detects the remote environment and prints the authorization URL instead of opening a browser. Open the URL on any device with a browser, complete the consent flow, and Hermes finishes the loopback exchange when the redirect comes back.
+
+If you need to force this behaviour explicitly:
+
+```bash
+hermes auth add xai-oauth --no-browser
+```
+
+## How the Login Works
+
+1. Hermes opens your browser to `accounts.x.ai`.
+2. You sign in (or confirm your existing session) and approve access.
+3. xAI redirects back to Hermes and the tokens are saved to `~/.hermes/auth.json`.
+4. From then on, Hermes refreshes the access token in the background — you stay signed in until you `hermes auth remove xai-oauth` or revoke access from your xAI account settings.
+
+## Checking Login Status
+
+```bash
+hermes doctor
+```
+
+The `◆ Auth Providers` section will show the current state of every provider, including `xai-oauth`.
+
+## Switching Models
+
+```bash
+hermes model
+# → Select "xAI Grok OAuth (SuperGrok Subscription)"
+# → Pick from the model list (grok-4.3 is pinned to the top)
+```
+
+Or set the model directly:
+
+```bash
+hermes config set model.default grok-4.3
+hermes config set model.provider xai-oauth
+```
+
+## Configuration Reference
+
+After login, `~/.hermes/config.yaml` will contain:
+
+```yaml
+model:
+  default: grok-4.3
+  provider: xai-oauth
+  base_url: https://api.x.ai/v1
+```
+
+### Provider aliases
+
+All of the following resolve to `xai-oauth`:
+
+```bash
+hermes --provider xai-oauth        # canonical
+hermes --provider grok-oauth       # alias
+hermes --provider x-ai-oauth       # alias
+hermes --provider xai-grok-oauth   # alias
+```
+
+## Direct-to-xAI Tools (TTS / Image / Video / Transcription)
+
+Once you're logged in via OAuth, every direct-to-xAI tool reuses the same bearer token automatically — there is **no separate setup** unless you'd rather use an API key.
+
+To pick a backend for each tool:
+
+```bash
+hermes tools
+# → Text-to-Speech       → "xAI TTS"
+# → Image Generation     → "xAI Grok Imagine (image)"
+# → Video Generation     → "xAI Grok Imagine"
+```
+
+If OAuth tokens are already stored, the picker confirms it and skips the credential prompt. If neither OAuth nor `XAI_API_KEY` is set, the picker offers a 3-choice menu: OAuth login, paste API key, or skip.
+
+:::note Video generation is off by default
+The `video_gen` toolset is disabled by default. Enable it in `hermes tools` → `🎬 Video Generation` (press space) before the agent can call `video_generate`. Otherwise the agent may fall back to the bundled ComfyUI skill, which is also tagged for video generation.
+:::
+
+### Models
+
+| Tool | Model | Notes |
+|------|-------|-------|
+| Chat | `grok-4.3` | Default; auto-selected when you log in via OAuth |
+| Chat | `grok-4.20-0309-reasoning` | Reasoning variant |
+| Chat | `grok-4.20-0309-non-reasoning` | Non-reasoning variant |
+| Chat | `grok-4.20-multi-agent-0309` | Multi-agent variant |
+| Image | `grok-imagine-image` | Default; ~5–10 s |
+| Image | `grok-imagine-image-quality` | Higher fidelity; ~10–20 s |
+| Video | `grok-imagine-video` | Text-to-video and image-to-video; up to 7 reference images |
+| TTS | (default voice) | xAI `/v1/tts` endpoint |
+
+The chat catalog is derived live from the on-disk `models.dev` cache; new xAI releases appear automatically once that cache refreshes. `grok-4.3` is always pinned to the top of the list.
+
+## Environment Variables
+
+| Variable | Effect |
+|----------|--------|
+| `XAI_BASE_URL` | Override the default `https://api.x.ai/v1` endpoint (rarely needed). |
+| `HERMES_INFERENCE_PROVIDER` | Force the active provider at runtime, e.g. `HERMES_INFERENCE_PROVIDER=xai-oauth hermes`. |
+
+## Troubleshooting
+
+### Token expired — not re-logging in automatically
+
+Hermes refreshes the token before each session and again reactively on a 401. If refresh fails with `invalid_grant` (the refresh token was revoked, or the account was rotated), Hermes surfaces a typed re-auth message instead of crashing.
+
+**Fix:** run `hermes auth add xai-oauth` again to start a fresh login.
+
+### Authorization timed out
+
+The loopback listener has a finite expiry window (default 180 s). If you don't approve the login in time, Hermes raises a timeout error.
+
+**Fix:** re-run `hermes auth add xai-oauth` (or `hermes model`). The flow starts fresh.
+
+### State mismatch (possible CSRF)
+
+Hermes detected that the `state` value returned by the authorization server doesn't match what it sent.
+
+**Fix:** re-run the login. If it persists, check for a proxy or redirect that is modifying the OAuth response.
+
+### Logging in from a remote server
+
+On SSH or container sessions Hermes prints the authorization URL instead of opening a browser. Open the URL on any device with a browser and complete the consent there — the loopback callback comes back to your remote host.
+
+You can also force this behaviour:
+
+```bash
+hermes auth add xai-oauth --no-browser
+```
+
+### "No xAI credentials found" error at runtime
+
+The auth store has no `xai-oauth` entry and no `XAI_API_KEY` is set. You haven't logged in yet, or the credential file was deleted.
+
+**Fix:** run `hermes model` and pick the xAI Grok OAuth provider, or run `hermes auth add xai-oauth`.
+
+## Logging Out
+
+To remove stored xAI Grok OAuth credentials:
+
+```bash
+hermes auth remove xai-oauth
+```
+
+This clears both the singleton `loopback_pkce` entry in `auth.json` and any matching credential-pool rows.
+
+## See Also
+
+- [AI Providers reference](../integrations/providers.md)
+- [Environment Variables](../reference/environment-variables.md)
+- [Configuration](../user-guide/configuration.md)
+- [Voice & TTS](../user-guide/features/tts.md)
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index af9e07814d7..e7b2e5ab86d 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -331,6 +331,8 @@ When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoin
 
 xAI is wired through the Responses API (`codex_responses` transport) for automatic reasoning support on Grok 4 models — no `reasoning_effort` parameter needed, the server reasons by default. Set `XAI_API_KEY` in `~/.hermes/.env` and pick xAI in `hermes model`, or drop `grok` as a shortcut into `/model grok-4-1-fast-reasoning`.
 
+SuperGrok subscribers can sign in with browser OAuth instead of using an API key — pick **xAI Grok OAuth (SuperGrok Subscription)** in `hermes model`, or run `hermes auth add xai-oauth`. The same OAuth bearer token is automatically reused by direct-to-xAI tools (TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md) for the full flow.
+
 When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history.
 
 No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
@@ -1444,7 +1446,7 @@ fallback_model:
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. The chain is tried entry-by-entry; activation is one-shot per session.
 
-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `azure-foundry`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `lmstudio`, `alibaba`, `alibaba-coding-plan`, `tencent-tokenhub`, `custom`.
+Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `xai-oauth`, `ollama-cloud`, `bedrock`, `ai-gateway`, `azure-foundry`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `lmstudio`, `alibaba`, `alibaba-coding-plan`, `tencent-tokenhub`, `custom`.
 
 :::tip
 Fallback is configured exclusively through `config.yaml` — or interactively via `hermes fallback`. For full details on when it triggers, how the chain advances, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
diff --git a/website/sidebars.ts b/website/sidebars.ts
index a8d893d6e72..a0fb24b8c50 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -191,6 +191,7 @@ const sidebars: SidebarsConfig = {
         'guides/migrate-from-openclaw',
         'guides/aws-bedrock',
         'guides/azure-foundry',
+        'guides/xai-grok-oauth',
         'guides/microsoft-graph-app-registration',
         'guides/operate-teams-meeting-pipeline',
       ],

From e4d7a5dffaa18676b8567469825c2082658d8557 Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 17:43:51 +0100
Subject: [PATCH 044/218] fix(tools): video_gen picker reflects active xAI
 selection and runs xai_grok post_setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs in the `hermes tools` reconfigure flow caused picking xAI Grok
Imagine for video_gen (or image_gen) to feel like a no-op:

1. `_is_provider_active()` had a branch for `image_gen_plugin_name` but
   none for `video_gen_plugin_name`, so a row marked as the active xAI
   video provider was never recognized as active. The picker fell through
   to the env-var fallback in `_detect_active_provider_index()`, which
   matched the FAL row (because `FAL_KEY` is set), so the picker visually
   defaulted to FAL even though the user had selected xAI.

2. `_plugin_video_gen_providers()` and `_plugin_image_gen_providers()`
   built picker rows from the plugin's `get_setup_schema()` but only
   copied `name`, `badge`, `tag`, `env_vars`. The xAI plugins declare
   `post_setup: "xai_grok"` so the picker should run the OAuth /
   API-key prompt hook after selection — that key was silently dropped,
   so the hook never fired from the picker rows.

Adds the missing `video_gen_plugin_name` branch (placed before the
`managed_nous_feature` block, mirroring the existing image_gen branch)
and propagates `post_setup` from the plugin schema into both picker-row
builders. Adds focused tests in `test_video_gen_picker.py` and
`test_image_gen_picker.py`.
---
 hermes_cli/tools_config.py                | 43 ++++++-----
 tests/hermes_cli/test_image_gen_picker.py | 27 +++++++
 tests/hermes_cli/test_video_gen_picker.py | 89 +++++++++++++++++++++++
 3 files changed, 141 insertions(+), 18 deletions(-)

diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 891ffdeb05a..377194589ea 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -1505,15 +1505,16 @@ def _plugin_image_gen_providers() -> list[dict]:
             continue
         if not isinstance(schema, dict):
             continue
-        rows.append(
-            {
-                "name": schema.get("name", provider.display_name),
-                "badge": schema.get("badge", ""),
-                "tag": schema.get("tag", ""),
-                "env_vars": schema.get("env_vars", []),
-                "image_gen_plugin_name": provider.name,
-            }
-        )
+        row = {
+            "name": schema.get("name", provider.display_name),
+            "badge": schema.get("badge", ""),
+            "tag": schema.get("tag", ""),
+            "env_vars": schema.get("env_vars", []),
+            "image_gen_plugin_name": provider.name,
+        }
+        if schema.get("post_setup"):
+            row["post_setup"] = schema["post_setup"]
+        rows.append(row)
     return rows
 
 
@@ -1542,15 +1543,16 @@ def _plugin_video_gen_providers() -> list[dict]:
             continue
         if not isinstance(schema, dict):
             continue
-        rows.append(
-            {
-                "name": schema.get("name", provider.display_name),
-                "badge": schema.get("badge", ""),
-                "tag": schema.get("tag", ""),
-                "env_vars": schema.get("env_vars", []),
-                "video_gen_plugin_name": provider.name,
-            }
-        )
+        row = {
+            "name": schema.get("name", provider.display_name),
+            "badge": schema.get("badge", ""),
+            "tag": schema.get("tag", ""),
+            "env_vars": schema.get("env_vars", []),
+            "video_gen_plugin_name": provider.name,
+        }
+        if schema.get("post_setup"):
+            row["post_setup"] = schema["post_setup"]
+        rows.append(row)
     return rows
 
 
@@ -1814,6 +1816,11 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
         image_cfg = config.get("image_gen", {})
         return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
 
+    video_plugin_name = provider.get("video_gen_plugin_name")
+    if video_plugin_name:
+        video_cfg = config.get("video_gen", {})
+        return isinstance(video_cfg, dict) and video_cfg.get("provider") == video_plugin_name
+
     managed_feature = provider.get("managed_nous_feature")
     if managed_feature:
         features = get_nous_subscription_features(config)
diff --git a/tests/hermes_cli/test_image_gen_picker.py b/tests/hermes_cli/test_image_gen_picker.py
index 6da847691a7..51eafd6da67 100644
--- a/tests/hermes_cli/test_image_gen_picker.py
+++ b/tests/hermes_cli/test_image_gen_picker.py
@@ -103,6 +103,33 @@ class TestPluginPickerInjection:
         visible = tools_config._visible_providers(browser, {})
         assert all(p.get("image_gen_plugin_name") is None for p in visible)
 
+    def test_post_setup_propagated_when_declared(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider(
+            "xai_img",
+            schema={
+                "name": "xAI Grok Imagine",
+                "badge": "paid",
+                "tag": "grok image",
+                "env_vars": [],
+                "post_setup": "xai_grok",
+            },
+        ))
+
+        rows = tools_config._plugin_image_gen_providers()
+        match = next(r for r in rows if r.get("image_gen_plugin_name") == "xai_img")
+        assert match["post_setup"] == "xai_grok"
+
+    def test_post_setup_omitted_when_not_declared(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("plain_img"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        match = next(r for r in rows if r.get("image_gen_plugin_name") == "plain_img")
+        assert "post_setup" not in match
+
 
 class TestPluginCatalog:
     def test_plugin_catalog_returns_models(self):
diff --git a/tests/hermes_cli/test_video_gen_picker.py b/tests/hermes_cli/test_video_gen_picker.py
index 85350947c96..c06e2ea2096 100644
--- a/tests/hermes_cli/test_video_gen_picker.py
+++ b/tests/hermes_cli/test_video_gen_picker.py
@@ -146,3 +146,92 @@ class TestReconfigureWritesProvider:
         assert config["video_gen"]["provider"] == "noenv_video"
         assert config["video_gen"]["model"] == "noenv_video-video-v1"
         assert config["video_gen"]["use_gateway"] is False
+
+
+class TestPluginVideoProvidersRow:
+    """Tests for _plugin_video_gen_providers row contents."""
+
+    def test_post_setup_propagated_when_declared(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        video_gen_registry.register_provider(_FakeVideoProvider(
+            "xai_video",
+            schema={
+                "name": "xAI Grok Imagine",
+                "badge": "paid",
+                "tag": "grok video",
+                "env_vars": [],
+                "post_setup": "xai_grok",
+            },
+        ))
+
+        rows = tools_config._plugin_video_gen_providers()
+        match = next(r for r in rows if r.get("video_gen_plugin_name") == "xai_video")
+        assert match["post_setup"] == "xai_grok"
+
+    def test_post_setup_omitted_when_not_declared(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        video_gen_registry.register_provider(_FakeVideoProvider("plain_video"))
+
+        rows = tools_config._plugin_video_gen_providers()
+        match = next(r for r in rows if r.get("video_gen_plugin_name") == "plain_video")
+        assert "post_setup" not in match
+
+
+class TestVideoPluginProviderActive:
+    """Tests for _is_provider_active recognizing video_gen_plugin_name."""
+
+    def test_active_when_video_gen_provider_matches(self):
+        from hermes_cli import tools_config
+
+        config = {"video_gen": {"provider": "xai"}}
+        row = {"name": "xAI Grok Imagine", "video_gen_plugin_name": "xai"}
+
+        assert tools_config._is_provider_active(row, config) is True
+
+    def test_inactive_when_video_gen_provider_differs(self):
+        from hermes_cli import tools_config
+
+        config = {"video_gen": {"provider": "fal"}}
+        row = {"name": "xAI Grok Imagine", "video_gen_plugin_name": "xai"}
+
+        assert tools_config._is_provider_active(row, config) is False
+
+    def test_inactive_when_video_gen_section_missing(self):
+        from hermes_cli import tools_config
+
+        row = {"name": "xAI Grok Imagine", "video_gen_plugin_name": "xai"}
+        assert tools_config._is_provider_active(row, {}) is False
+
+    def test_detect_active_index_picks_video_plugin_match(self, monkeypatch):
+        """When xAI is the configured video_gen provider, the picker should
+        default to the xAI row even if FAL_KEY happens to be set in env.
+
+        Regression: previously _detect_active_provider_index() saw
+        _is_provider_active(xai) return False (no video_gen branch),
+        skipped xAI (empty env_vars), and matched the FAL row via the
+        env-var fallback — so the picker visually defaulted to FAL even
+        though the user picked xAI. The xAI row uses empty env_vars
+        because authentication is handled via xAI Grok OAuth (post_setup
+        hook).
+        """
+        from hermes_cli import tools_config
+
+        monkeypatch.setattr(
+            tools_config,
+            "get_env_value",
+            lambda key: "fal-key" if key == "FAL_KEY" else "",
+        )
+
+        config = {"video_gen": {"provider": "xai"}}
+        providers = [
+            {"name": "xAI Grok Imagine", "env_vars": [], "video_gen_plugin_name": "xai"},
+            {
+                "name": "FAL.ai",
+                "env_vars": [{"key": "FAL_KEY", "prompt": "FAL"}],
+                "video_gen_plugin_name": "fal",
+            },
+        ]
+
+        assert tools_config._detect_active_provider_index(providers, config) == 0

From 9eef53b9605410ddc4fe1dfa79214a137787141c Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 17:44:27 +0100
Subject: [PATCH 045/218] chore(release): map Jaaneek@users.noreply.github.com
 to Jaaneek

The contributor's commit author email is the legacy GitHub noreply
form (no leading numeric "id+"), so it doesn't match the
check-attribution workflow's auto-resolve regex
(\+.*@users\.noreply\.github\.com). Register it explicitly in
AUTHOR_MAP so the PR #26457 attribution check passes.
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index f3df43c3fe1..740b79091b1 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1074,6 +1074,7 @@ AUTHOR_MAP = {
     "16034932+Arkmusn@users.noreply.github.com": "Arkmusn",  # PR #25559 salvage (approvals.timeout from config)
     "nidhi2894@gmail.com": "nidhi-singh02",  # PR #2752 salvage (slack whitespace-only IndexError guard)
     "38173192+nidhi-singh02@users.noreply.github.com": "nidhi-singh02",
+    "Jaaneek@users.noreply.github.com": "Jaaneek",  # PR #26457 (xAI Grok OAuth provider)
 }
 
 
From e13c1b806018427aaf5fbe4b0ff2c6ca6821d6db Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 18:27:54 +0100
Subject: [PATCH 046/218] fix(xai-http): preserve ~/.hermes/.env fallback and
 XAI_STT_BASE_URL precedence

The new resolve_xai_http_credentials() resolver was using os.getenv()
for the XAI_API_KEY/XAI_BASE_URL fallback path, which dropped the
~/.hermes/.env contract guarded by PR #17140 / #17163. Users with
XAI_API_KEY in dotenv only would see "No xAI credentials found" even
though the key was configured.

Separately, _transcribe_xai started consulting creds["base_url"] (which
always returns at least the default https://api.x.ai/v1) ahead of the
public XAI_STT_BASE_URL env override, so the per-tool override stopped
working.

- tools/xai_http.py: add module-level get_env_value() wrapper that
  reads ~/.hermes/.env first (via hermes_cli.config.get_env_value),
  then os.environ. Resolver uses it for the API-key/base-url fallback.
- tools/transcription_tools.py: restore precedence so XAI_STT_BASE_URL
  wins over creds["base_url"].
- tests/tools/test_transcription_dotenv_fallback.py +
  tests/tools/test_tts_dotenv_fallback.py: repoint the per-call-site
  patches at the new resolution point (tools.xai_http.get_env_value).
  The end-to-end regression-guard test (which patches load_env) is
  unchanged and still passes.
---
 .../test_transcription_dotenv_fallback.py     | 13 +++++---
 tests/tools/test_tts_dotenv_fallback.py       |  7 ++++-
 tools/transcription_tools.py                  |  2 +-
 tools/xai_http.py                             | 30 ++++++++++++++++---
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/tests/tools/test_transcription_dotenv_fallback.py b/tests/tools/test_transcription_dotenv_fallback.py
index 73e7a42a59b..a28c777a8f1 100644
--- a/tests/tools/test_transcription_dotenv_fallback.py
+++ b/tests/tools/test_transcription_dotenv_fallback.py
@@ -170,7 +170,15 @@ class TestTranscribeCallSitesReadDotenv:
         assert seen_keys == ["mistral-dotenv-key"]
 
     def test_transcribe_xai_forwards_dotenv_key(self):
+        """xAI STT now resolves credentials through ``tools.xai_http`` so the
+        OAuth bearer wins when present and ``XAI_API_KEY`` is the fallback.
+        Patch the resolver's ``get_env_value`` to simulate a dotenv-only key
+        and confirm it reaches the HTTP call. The per-call-site
+        ``transcription_tools.get_env_value`` is still consulted for the
+        ``XAI_STT_BASE_URL`` override (covered by ``test_custom_base_url``).
+        """
         from tools import transcription_tools as tt
+        from tools import xai_http
 
         captured: dict = {}
 
@@ -183,15 +191,12 @@ class TestTranscribeCallSitesReadDotenv:
             response.json.return_value = {"text": "hello"}
             return response
 
-        # get_env_value is consulted for both XAI_API_KEY and XAI_STT_BASE_URL.
-        # Return the key for the first call, None for base-url override
-        # (so it defaults to the module-level XAI_STT_BASE_URL).
         def fake_get_env_value(name, default=None):
             if name == "XAI_API_KEY":
                 return "xai-dotenv-key"
             return None
 
-        with patch.object(tt, "get_env_value", side_effect=fake_get_env_value), \
+        with patch.object(xai_http, "get_env_value", side_effect=fake_get_env_value), \
              patch("requests.post", side_effect=fake_post), \
              patch("builtins.open", MagicMock()):
             result = tt._transcribe_xai("/tmp/fake.mp3", "grok-stt")
diff --git a/tests/tools/test_tts_dotenv_fallback.py b/tests/tools/test_tts_dotenv_fallback.py
index 05083208709..0a4ea5a8ac2 100644
--- a/tests/tools/test_tts_dotenv_fallback.py
+++ b/tests/tools/test_tts_dotenv_fallback.py
@@ -57,7 +57,12 @@ class TestDotenvFallbackPerProvider:
             mock_import.return_value.assert_called_once_with(api_key="el-dotenv-key")
 
     def test_xai_reads_dotenv_key(self, tmp_path):
+        """xAI TTS now resolves credentials through ``tools.xai_http``; the
+        dotenv fallback contract from #17140 is preserved by patching the
+        resolver's ``get_env_value`` rather than ``tts_tool.get_env_value``.
+        """
         from tools import tts_tool
+        from tools import xai_http
 
         captured: dict = {}
 
@@ -69,7 +74,7 @@ class TestDotenvFallbackPerProvider:
             response.raise_for_status = MagicMock()
             return response
 
-        with patch.object(tts_tool, "get_env_value", return_value="xai-dotenv-key"), \
+        with patch.object(xai_http, "get_env_value", return_value="xai-dotenv-key"), \
              patch("requests.post", side_effect=fake_post):
             tts_tool._generate_xai_tts("hi", str(tmp_path / "out.mp3"), {})
 
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 6f6d2f8c2a3..d741530d358 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -726,8 +726,8 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]:
     xai_config = stt_config.get("xai", {})
     base_url = str(
         xai_config.get("base_url")
-        or creds.get("base_url")
         or get_env_value("XAI_STT_BASE_URL")
+        or creds.get("base_url")
         or XAI_STT_BASE_URL
     ).strip().rstrip("/")
     language = str(
diff --git a/tools/xai_http.py b/tools/xai_http.py
index fbb7961d244..216a51ff10d 100644
--- a/tools/xai_http.py
+++ b/tools/xai_http.py
@@ -5,6 +5,25 @@ from __future__ import annotations
 import os
 from typing import Dict
 
+try:
+    from hermes_cli.config import get_env_value as _hermes_get_env_value
+except Exception:
+    _hermes_get_env_value = None
+
+
+def get_env_value(name: str, default=None):
+    """Read ``name`` from ``~/.hermes/.env`` first, then ``os.environ``.
+
+    Wraps :func:`hermes_cli.config.get_env_value` so tests can patch
+    ``tools.xai_http.get_env_value`` to inject dotenv-only secrets into the
+    xAI credential resolver.
+    """
+    if _hermes_get_env_value is not None:
+        value = _hermes_get_env_value(name)
+        if value is not None:
+            return value
+    return os.environ.get(name, default)
+
 
 def hermes_xai_user_agent() -> str:
     """Return a stable Hermes-specific User-Agent for xAI HTTP calls."""
@@ -19,8 +38,11 @@ def resolve_xai_http_credentials() -> Dict[str, str]:
     """Resolve bearer credentials for direct xAI HTTP endpoints.
 
     Prefers Hermes-managed xAI OAuth credentials when available, then falls back
-    to ``XAI_API_KEY`` from the environment. This keeps direct xAI endpoints
-    (images, TTS, STT, etc.) aligned with the main runtime auth model.
+    to ``XAI_API_KEY`` resolved via ``hermes_cli.config.get_env_value`` so keys
+    stored in ``~/.hermes/.env`` (the standard Hermes location) are honored —
+    not just ones already exported into ``os.environ``. This keeps direct xAI
+    endpoints (images, TTS, STT, etc.) aligned with the main runtime auth model
+    and preserves the regression contract from PR #17140 / #17163.
     """
     try:
         from hermes_cli.runtime_provider import resolve_runtime_provider
@@ -52,8 +74,8 @@ def resolve_xai_http_credentials() -> Dict[str, str]:
     except Exception:
         pass
 
-    api_key = os.getenv("XAI_API_KEY", "").strip()
-    base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
+    api_key = str(get_env_value("XAI_API_KEY") or "").strip()
+    base_url = str(get_env_value("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
     return {
         "provider": "xai",
         "api_key": api_key,

From 7fdc16dd4a281dad84a245ab9eed3be2f4a94264 Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 18:28:01 +0100
Subject: [PATCH 047/218] refactor(transports/codex): trim duplicated cache-key
 comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xAI prompt_cache_key block carried two long comment paragraphs
that either restated setdefault semantics, narrated the SDK
type-validation mechanism, or recapped the historical motivation for
the extra_body indirection — all already covered by the test
docstring at test_xai_responses_sends_cache_key_via_extra_body
(which links to the xAI docs). Also restored the truncated link in
the body-injection comment.

No behavior change.
---
 agent/transports/codex.py                     | 20 ++++++-------------
 .../agent/transports/test_codex_transport.py  |  5 -----
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/agent/transports/codex.py b/agent/transports/codex.py
index 46169e971ba..cfd9f128778 100644
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -102,11 +102,8 @@ class ResponsesApiTransport(ProviderTransport):
             kwargs["parallel_tool_calls"] = True
 
         session_id = params.get("session_id")
-        # xAI's Responses API uses `prompt_cache_key` (body-level) as the
-        # cache-routing key, not a top-level kwarg — the body-field
-        # injection below survives openai SDK builds whose
-        # Responses.stream() signature drops the kwarg. Everything else
-        # that ISN'T github/xAI keeps using the typed kwarg.
+        # xAI Responses takes prompt_cache_key in extra_body (set further
+        # down); GitHub Models opts out of cache-key routing entirely.
         if not is_github_responses and not is_xai_responses and session_id:
             kwargs["prompt_cache_key"] = session_id
 
@@ -172,15 +169,10 @@ class ResponsesApiTransport(ProviderTransport):
             merged_extra_headers["x-grok-conv-id"] = session_id
             kwargs["extra_headers"] = merged_extra_headers
 
-            # xAI Responses cache-routing field. Lives in the request body
-            # (per https://docs.x.ai/.../prompt-caching/maximizing-cache-hits),
-            # so we ship it via extra_body — the openai SDK serializes
-            # extra_body fields into the JSON body without per-field type
-            # validation, sidestepping the TypeError that fires on
-            # Responses.stream() builds whose `prompt_cache_key` kwarg has
-            # been dropped. Setdefault preserves a caller-supplied value
-            # (e.g. request_overrides.extra_body.prompt_cache_key) over
-            # the auto-derived session_id.
+            # xAI Responses cache-routing — body-level field per
+            # https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits.
+            # Sent via extra_body (not the typed kwarg) so it survives openai
+            # SDK builds whose Responses.stream() signature has dropped the field.
             existing_extra_body = kwargs.get("extra_body")
             merged_extra_body: Dict[str, Any] = {}
             if isinstance(existing_extra_body, dict):
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
index 7100e8ac17d..ad70167b09f 100644
--- a/tests/agent/transports/test_codex_transport.py
+++ b/tests/agent/transports/test_codex_transport.py
@@ -117,13 +117,8 @@ class TestCodexBuildKwargs:
             session_id="conv-xai-1",
             is_xai_responses=True,
         )
-        # Top-level prompt_cache_key must NOT be set for xAI — the SDK
-        # signature drop is what motivated the extra_body indirection in
-        # the first place. The cache-routing field must travel in the
-        # body via extra_body.
         assert "prompt_cache_key" not in kw
         assert kw.get("extra_body", {}).get("prompt_cache_key") == "conv-xai-1"
-        # Header kept as belt-and-braces.
         assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-xai-1"
 
     def test_xai_responses_extra_body_preserves_caller_fields(self, transport):

From 1e4801b8d0c27c1d6f6f8ed14ace0d3045a0d695 Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 18:46:45 +0100
Subject: [PATCH 048/218] docs(xai-oauth): correct logout command (was hermes
 auth remove)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous "Logging Out" section showed `hermes auth remove xai-oauth`
with no positional target — argparse rejects that and the command does
not clear the singleton OAuth state anyway. The correct command for the
"clear everything" intent is `hermes auth logout xai-oauth`. Also point
users at `hermes auth remove xai-oauth <target>` for single-pool-row
deletion.
---
 website/docs/guides/xai-grok-oauth.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md
index 49c7087621a..5afccb6d881 100644
--- a/website/docs/guides/xai-grok-oauth.md
+++ b/website/docs/guides/xai-grok-oauth.md
@@ -198,13 +198,13 @@ The auth store has no `xai-oauth` entry and no `XAI_API_KEY` is set. You haven't
 
 ## Logging Out
 
-To remove stored xAI Grok OAuth credentials:
+To remove all stored xAI Grok OAuth credentials:
 
 ```bash
-hermes auth remove xai-oauth
+hermes auth logout xai-oauth
 ```
 
-This clears both the singleton `loopback_pkce` entry in `auth.json` and any matching credential-pool rows.
+This clears both the singleton OAuth entry in `auth.json` and any credential-pool rows for `xai-oauth`. Use `hermes auth remove xai-oauth <index|id|label>` if you only want to drop a single pool entry (run `hermes auth list xai-oauth` to see them).
 
 ## See Also
 

From 7d7cdd48e06b9bbf0fd4e030f6745e8b033e1adc Mon Sep 17 00:00:00 2001
From: Jaaneek <Jaaneek@users.noreply.github.com>
Date: Fri, 15 May 2026 19:04:14 +0100
Subject: [PATCH 049/218] test(xai-oauth): use grok-4.3 instead of retiring
 grok-code-fast-1

Per @mark-xai's review on PR #26457 and the xAI model retirement on
2026-05-15: grok-code-fast-1 is being retired today and aliases redirect
to grok-4.3 (already pinned to the top of the xAI model list by this
PR). Update the two xAI Responses-API test fixtures Mark flagged plus
the picker fallback default in hermes_cli/main.py that uses the same
literal.
---
 hermes_cli/main.py                                | 2 +-
 tests/run_agent/test_run_agent_codex_responses.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index c7ac1100816..c2c8a6880d2 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2887,7 +2887,7 @@ def _model_flow_xai_oauth(_config, current_model=""):
         pass
 
     models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or [])
-    selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-code-fast-1"))
+    selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3"))
     if selected:
         _save_model_choice(selected)
         _update_config_for_provider("xai-oauth", base_url)
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 8cc02629523..5652281eb42 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -581,7 +581,7 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
 def _build_xai_oauth_agent(monkeypatch):
     _patch_agent_bootstrap(monkeypatch)
     agent = run_agent.AIAgent(
-        model="grok-code-fast-1",
+        model="grok-4.3",
         provider="xai-oauth",
         api_mode="codex_responses",
         base_url="https://api.x.ai/v1",
@@ -619,7 +619,7 @@ def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch):
         ]
     )
 
-    assert kwargs.get("model") == "grok-code-fast-1"
+    assert kwargs.get("model") == "grok-4.3"
     # Top-level kwarg must NOT be set — that's the openai SDK
     # incompatibility this whole indirection exists to dodge.
     assert "prompt_cache_key" not in kwargs

From aac6d97a143759731431ade9a098b4baa55fc53d Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 12:11:08 -0700
Subject: [PATCH 050/218] chore(xai-oauth): trim CORS allowlist to xAI auth
 origins
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop accounts.mouseion.dev and localhost:20000 / 127.0.0.1:20000 from
the loopback callback CORS allowlist — leftover dev origins. The
redirect_uri is bound to 127.0.0.1 and gated by PKCE + state, so only
xAI's own auth origins are needed.

Co-Authored-By: Jaaneek <Jaaneek@users.noreply.github.com>
---
 hermes_cli/auth.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 8749cd9461c..c6dce709384 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -2081,12 +2081,12 @@ def _xai_validate_loopback_redirect_uri(redirect_uri: str) -> tuple[str, int, st
 
 
 def _xai_callback_cors_origin(origin: Optional[str]) -> str:
+    # CORS allowlist for the loopback callback.  Only xAI's own auth origins
+    # are accepted; the redirect_uri itself is bound to 127.0.0.1 and gated by
+    # PKCE+state, so additional dev/3p origins are not needed here.
     allowed = {
         "https://accounts.x.ai",
         "https://auth.x.ai",
-        "https://accounts.mouseion.dev",
-        "http://localhost:20000",
-        "http://127.0.0.1:20000",
     }
     return origin if origin in allowed else ""
 

From 4ad5fa702f6c04a2032be876a8d4d0b37a88459d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 12:33:12 -0700
Subject: [PATCH 051/218] docs(xai-oauth): add xai-oauth to provider
 enumeration pages (#26542)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #26534 (xai-oauth provider). The new guide and integrations
page were shipped with the salvage, but four reference/enumeration pages
still listed every other OAuth provider without xai-oauth:

- reference/cli-commands.md     — `--provider` choices list
- reference/environment-variables.md — HERMES_INFERENCE_PROVIDER values
- user-guide/configuration.md   — auxiliary-task provider list, OAuth
                                  tip block (mirrored from MiniMax OAuth),
                                  and provider table row
- user-guide/features/fallback-providers.md — provider table
---
 website/docs/reference/cli-commands.md                 | 2 +-
 website/docs/reference/environment-variables.md        | 2 +-
 website/docs/user-guide/configuration.md               | 7 ++++++-
 website/docs/user-guide/features/fallback-providers.md | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index a895e1efa74..aa12f431b62 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -92,7 +92,7 @@ Common options:
 | `-q`, `--query "..."` | One-shot, non-interactive prompt. |
 | `-m`, `--model <model>` | Override the model for this run. |
 | `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
-| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `novita`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). |
+| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `google-gemini-cli`, `huggingface`, `novita`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `xai-oauth` (alias `grok-oauth`), `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry`, `lmstudio`, `stepfun`, `tencent-tokenhub` (alias `tencent`, `tokenhub`). |
 | `-s`, `--skills <name>` | Preload one or more skills for the session (can be repeated or comma-separated). |
 | `-v`, `--verbose` | Verbose output. |
 | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 93107fba147..56fe8a13715 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -105,7 +105,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `novita`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `novita`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth` (browser OAuth login — no API key required; see [MiniMax OAuth guide](../guides/minimax-oauth.md)), `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `alibaba-coding-plan` (alias `alibaba_coding`), `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `xai-oauth` (browser OAuth login for SuperGrok subscribers — no API key required; see [xAI Grok OAuth guide](../guides/xai-grok-oauth.md)), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway`, `tencent-tokenhub` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 89bdb234146..d529c8af687 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -813,12 +813,16 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t
 
 When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL.
 
-Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
+Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `xai-oauth`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway`, `azure-foundry` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
 
 :::tip MiniMax OAuth
 `minimax-oauth` logs in via browser OAuth (no API key needed). Run `hermes model` and select **MiniMax (OAuth)** to authenticate. Auxiliary tasks use `MiniMax-M2.7-highspeed` automatically. See the [MiniMax OAuth guide](../guides/minimax-oauth.md).
 :::
 
+:::tip xAI Grok OAuth
+`xai-oauth` logs in via browser OAuth for SuperGrok subscribers (no API key needed). Run `hermes model` and select **xAI Grok OAuth (SuperGrok Subscription)** to authenticate. The same OAuth token is reused for every direct-to-xAI surface (chat, auxiliary tasks, TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md).
+:::
+
 :::warning `"main"` is for auxiliary tasks only
 The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/docs/integrations/providers) for all main model provider options.
 :::
@@ -980,6 +984,7 @@ These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`,
 | `"nous"` | Force Nous Portal | `hermes auth` |
 | `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
 | `"minimax-oauth"` | Force MiniMax OAuth (browser login, no API key). Uses MiniMax-M2.7-highspeed for auxiliary tasks. | `hermes model` → MiniMax (OAuth) |
+| `"xai-oauth"` | Force xAI Grok OAuth (browser login for SuperGrok subscribers, no API key). Same OAuth token covers chat, TTS, image, video, and transcription. | `hermes model` → xAI Grok OAuth (SuperGrok Subscription) |
 | `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL |
 
 Direct API-key providers from the main provider catalog also work here when you want side tasks to bypass your default router. `gmi` is valid once `GMI_API_KEY` is configured:
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index cd002ae689e..72528796d55 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -66,6 +66,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
 | Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |
 | xAI (Grok) | `xai` (alias `grok`) | `XAI_API_KEY` (optional: `XAI_BASE_URL`) |
+| xAI Grok OAuth (SuperGrok) | `xai-oauth` (alias `grok-oauth`) | `hermes model` → xAI Grok OAuth (browser login; SuperGrok subscription) |
 | AWS Bedrock | `bedrock` | Standard boto3 auth (`AWS_REGION` + `AWS_PROFILE` or `AWS_ACCESS_KEY_ID`) |
 | Qwen Portal (OAuth) | `qwen-oauth` | `hermes model` (Qwen Portal OAuth; optional: `HERMES_QWEN_BASE_URL`) |
 | MiniMax (OAuth) | `minimax-oauth` | `hermes model` (MiniMax portal OAuth) |

From 734aa0f367a5ace259e4c35d7b002b634a3149ae Mon Sep 17 00:00:00 2001
From: aydnOktay <xaydinoktay@gmail.com>
Date: Tue, 24 Mar 2026 13:50:11 +0300
Subject: [PATCH 052/218] fix(cronjob): require explicit truthy session env
 values

---
 tests/tools/test_cronjob_tools.py |  7 +++++++
 tools/cronjob_tools.py            | 14 +++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index 3e1f85c370a..34c5fede560 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -122,6 +122,13 @@ class TestCronjobRequirements:
 
         assert check_cronjob_requirements() is False
 
+    @pytest.mark.parametrize("false_like_value", ["0", "false", "no", "off"])
+    def test_rejects_false_like_interactive_env(self, monkeypatch, false_like_value):
+        monkeypatch.setenv("HERMES_INTERACTIVE", false_like_value)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        assert check_cronjob_requirements() is False
+
 
 class TestUnifiedCronjobTool:
     @pytest.fixture(autouse=True)
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 3c29431484d..698aab2cfc2 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -662,6 +662,14 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
 }
 
 
+def _is_truthy_env(var_name: str) -> bool:
+    """Return True only for explicit truthy env values."""
+    value = os.getenv(var_name)
+    if value is None:
+        return False
+    return value.strip().lower() in {"1", "true", "yes", "on"}
+
+
 def check_cronjob_requirements() -> bool:
     """
     Check if cronjob tools can be used.
@@ -671,9 +679,9 @@ def check_cronjob_requirements() -> bool:
     so no external crontab executable is required.
     """
     return bool(
-        os.getenv("HERMES_INTERACTIVE")
-        or os.getenv("HERMES_GATEWAY_SESSION")
-        or os.getenv("HERMES_EXEC_ASK")
+        _is_truthy_env("HERMES_INTERACTIVE")
+        or _is_truthy_env("HERMES_GATEWAY_SESSION")
+        or _is_truthy_env("HERMES_EXEC_ASK")
     )
 
 
From 931caf2b2d42d6e76b8c470e5d44ca20704c41dc Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 02:03:49 -0700
Subject: [PATCH 053/218] fix(env-flags): widen truthy-only session env checks
 to sibling sites

Build on @aydnOktay's cronjob fix by routing the cronjob check through
the shared 'env_var_enabled' helper in utils.py (same truthy set:
1/true/yes/on) and applying the same semantics to the 8 sibling call
sites that read HERMES_INTERACTIVE / HERMES_GATEWAY_SESSION /
HERMES_EXEC_ASK / HERMES_CRON_SESSION with bare os.getenv() truthy
checks:

- tools/approval.py: _is_gateway_approval_context (2), check_command_safety (2),
  check_all_command_guards (3) -- 7 sites total
- tools/terminal_tool.py: _handle_sudo_failure, sudo password prompt -- 2 sites
- tools/skills_tool.py: _is_gateway_surface -- 1 site

Without this, a user who exports HERMES_INTERACTIVE=0 in their shell
still gets interactive sudo prompts, approval prompts, and gateway
skill-install paths -- only the cronjob tool was hardened. Now all
consumers agree on the same false-like values.

Also drops the duplicate _is_truthy_env helper from cronjob_tools.py
in favour of the existing canonical utils.env_var_enabled.

Tests: extend the parametrized regression coverage to all three
session env vars (HERMES_INTERACTIVE / HERMES_GATEWAY_SESSION /
HERMES_EXEC_ASK) symmetrically. tests/tools/test_cronjob_tools.py:
60/60 pass; tests/tools/{approval,terminal_tool,skills_tool,
cron_approval_mode,hardline_blocklist}.py: 378/378 pass.
---
 tests/tools/test_cronjob_tools.py | 14 ++++++++++++++
 tools/approval.py                 | 18 +++++++++---------
 tools/cronjob_tools.py            | 23 +++++++++++------------
 tools/skills_tool.py              |  3 ++-
 tools/terminal_tool.py            |  6 ++++--
 5 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index 34c5fede560..6280b71d29f 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -129,6 +129,20 @@ class TestCronjobRequirements:
         monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
         assert check_cronjob_requirements() is False
 
+    @pytest.mark.parametrize(
+        "var_name",
+        ["HERMES_INTERACTIVE", "HERMES_GATEWAY_SESSION", "HERMES_EXEC_ASK"],
+    )
+    @pytest.mark.parametrize("false_like_value", ["0", "false", "no", "off"])
+    def test_rejects_false_like_any_session_env(
+        self, monkeypatch, var_name, false_like_value
+    ):
+        """All three session env vars share the same truthy semantics."""
+        for v in ("HERMES_INTERACTIVE", "HERMES_GATEWAY_SESSION", "HERMES_EXEC_ASK"):
+            monkeypatch.delenv(v, raising=False)
+        monkeypatch.setenv(var_name, false_like_value)
+        assert check_cronjob_requirements() is False
+
 
 class TestUnifiedCronjobTool:
     @pytest.fixture(autouse=True)
diff --git a/tools/approval.py b/tools/approval.py
index dbb3810886f..84d02cc6a98 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -19,7 +19,7 @@ import unicodedata
 from typing import Optional
 from hermes_cli.config import cfg_get
 
-from utils import is_truthy_value
+from utils import env_var_enabled, is_truthy_value
 
 logger = logging.getLogger(__name__)
 
@@ -108,9 +108,9 @@ def _is_gateway_approval_context() -> bool:
     fall through to the gateway branch would submit a pending approval
     with no listener and block the job indefinitely.
     """
-    if os.getenv("HERMES_CRON_SESSION"):
+    if env_var_enabled("HERMES_CRON_SESSION"):
         return False
-    if os.getenv("HERMES_GATEWAY_SESSION"):
+    if env_var_enabled("HERMES_GATEWAY_SESSION"):
         return True
     return bool(_get_session_platform())
 
@@ -928,12 +928,12 @@ def check_dangerous_command(command: str, env_type: str,
     if is_approved(session_key, pattern_key):
         return {"approved": True, "message": None}
 
-    is_cli = os.getenv("HERMES_INTERACTIVE")
+    is_cli = env_var_enabled("HERMES_INTERACTIVE")
     is_gateway = _is_gateway_approval_context()
 
     if not is_cli and not is_gateway:
         # Cron sessions: respect cron_mode config
-        if os.getenv("HERMES_CRON_SESSION"):
+        if env_var_enabled("HERMES_CRON_SESSION"):
             if _get_cron_approval_mode() == "deny":
                 return {
                     "approved": False,
@@ -947,7 +947,7 @@ def check_dangerous_command(command: str, env_type: str,
                 }
         return {"approved": True, "message": None}
 
-    if is_gateway or os.getenv("HERMES_EXEC_ASK"):
+    if is_gateway or env_var_enabled("HERMES_EXEC_ASK"):
         submit_pending(session_key, {
             "command": command,
             "pattern_key": pattern_key,
@@ -1056,15 +1056,15 @@ def check_all_command_guards(command: str, env_type: str,
     if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled() or approval_mode == "off":
         return {"approved": True, "message": None}
 
-    is_cli = os.getenv("HERMES_INTERACTIVE")
+    is_cli = env_var_enabled("HERMES_INTERACTIVE")
     is_gateway = _is_gateway_approval_context()
-    is_ask = os.getenv("HERMES_EXEC_ASK")
+    is_ask = env_var_enabled("HERMES_EXEC_ASK")
 
     # Preserve the existing non-interactive behavior: outside CLI/gateway/ask
     # flows, we do not block on approvals and we skip external guard work.
     if not is_cli and not is_gateway and not is_ask:
         # Cron sessions: respect cron_mode config
-        if os.getenv("HERMES_CRON_SESSION"):
+        if env_var_enabled("HERMES_CRON_SESSION"):
             if _get_cron_approval_mode() == "deny":
                 # Run detection to get a description for the block message
                 is_dangerous, _pk, description = detect_dangerous_command(command)
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 698aab2cfc2..a7a8a0feab9 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -662,14 +662,6 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
 }
 
 
-def _is_truthy_env(var_name: str) -> bool:
-    """Return True only for explicit truthy env values."""
-    value = os.getenv(var_name)
-    if value is None:
-        return False
-    return value.strip().lower() in {"1", "true", "yes", "on"}
-
-
 def check_cronjob_requirements() -> bool:
     """
     Check if cronjob tools can be used.
@@ -677,11 +669,18 @@ def check_cronjob_requirements() -> bool:
     Available in interactive CLI mode and gateway/messaging platforms.
     The cron system is internal (JSON file-based scheduler ticked by the gateway),
     so no external crontab executable is required.
+
+    Session env vars must hold an explicit truthy string (``1``, ``true``,
+    ``yes``, ``on``) — false-like values (``0``, ``false``, ``no``, ``off``)
+    leave the tool disabled. Uses the shared ``env_var_enabled`` helper so
+    every consumer of these flags agrees on the truthy set.
     """
-    return bool(
-        _is_truthy_env("HERMES_INTERACTIVE")
-        or _is_truthy_env("HERMES_GATEWAY_SESSION")
-        or _is_truthy_env("HERMES_EXEC_ASK")
+    from utils import env_var_enabled
+
+    return (
+        env_var_enabled("HERMES_INTERACTIVE")
+        or env_var_enabled("HERMES_GATEWAY_SESSION")
+        or env_var_enabled("HERMES_EXEC_ASK")
     )
 
 
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index 0fcd449b80b..df6361ba59a 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -78,6 +78,7 @@ from typing import Dict, Any, List, Optional, Set, Tuple
 
 from tools.registry import registry, tool_error
 from hermes_cli.config import cfg_get
+from utils import env_var_enabled
 
 logger = logging.getLogger(__name__)
 
@@ -365,7 +366,7 @@ def _capture_required_environment_variables(
 
 
 def _is_gateway_surface() -> bool:
-    if os.getenv("HERMES_GATEWAY_SESSION"):
+    if env_var_enabled("HERMES_GATEWAY_SESSION"):
         return True
     from gateway.session_context import get_session_env
     return bool(get_session_env("HERMES_SESSION_PLATFORM"))
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index e0d07e80f6e..31a1c6fa078 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -47,6 +47,8 @@ import subprocess
 from pathlib import Path
 from typing import Optional, Dict, Any, List
 
+from utils import env_var_enabled
+
 logger = logging.getLogger(__name__)
 
 
@@ -360,7 +362,7 @@ def _handle_sudo_failure(output: str, env_type: str) -> str:
     
     Returns enhanced output if sudo failed in messaging context, else original.
     """
-    is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
+    is_gateway = env_var_enabled("HERMES_GATEWAY_SESSION")
     
     if not is_gateway:
         return output
@@ -868,7 +870,7 @@ def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None
     if not has_configured_password and not sudo_password and _sudo_nopasswd_works():
         return command, None
 
-    if not has_configured_password and not sudo_password and os.getenv("HERMES_INTERACTIVE"):
+    if not has_configured_password and not sudo_password and env_var_enabled("HERMES_INTERACTIVE"):
         sudo_password = _prompt_for_sudo_password(timeout_seconds=45)
         if sudo_password:
             _set_cached_sudo_password(sudo_password)

From 4e89c53082b13b71d0c7f2f662cd65ea80d9f17c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:00:01 -0700
Subject: [PATCH 054/218] fix(async): close unscheduled coroutines in all
 threadsafe bridges (#26584)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wraps every sync->async coroutine-scheduling site in the codebase with a
new agent.async_utils.safe_schedule_threadsafe() helper that closes the
coroutine on scheduling failure (closed loop, shutdown race, etc.)
instead of leaking it as 'coroutine was never awaited' RuntimeWarnings
plus reference leaks.

22 production call sites migrated across the codebase:
- acp_adapter/events.py, acp_adapter/permissions.py
- agent/lsp/manager.py
- cron/scheduler.py (media + text delivery paths)
- gateway/platforms/feishu.py (5 sites, via existing _submit_on_loop helper
  which now delegates to safe_schedule_threadsafe)
- gateway/run.py (10 sites: telegram rename, agent:step hook, status
  callback, interim+bg-review, clarify send, exec-approval button+text,
  temp-bubble cleanup, channel-directory refresh)
- plugins/memory/hindsight, plugins/platforms/google_chat
- tools/browser_supervisor.py (3), browser_cdp_tool.py,
  computer_use/cua_backend.py, slash_confirm.py
- tools/environments/modal.py (_AsyncWorker)
- tools/mcp_tool.py (2 + 8 _run_on_mcp_loop callers converted to
  factory-style so the coroutine is never constructed on a dead loop)
- tui_gateway/ws.py

Tests: new tests/agent/test_async_utils.py covers helper behavior under
live loop, dead loop, None loop, and scheduling exceptions. Regression
tests added at three PR-original sites (acp events, acp permissions,
mcp loop runner) mirroring contributor's intent.

Live-tested end-to-end:
- Helper stress test: 1500 schedules across live/dead/race scenarios,
  zero leaked coroutines
- Race exercised: 5000 schedules with loop killed mid-flight, 100 ok /
  4900 None returns, zero leaks
- hermes chat -q with terminal tool call (exercises step_callback bridge)
- MCP probe against failing subprocess servers + factory path
- Real gateway daemon boot + SIGINT shutdown across multiple platform
  adapter inits
- WSTransport 100 live + 50 dead-loop writes
- Cron delivery path live + dead loop

Salvages PR #2657 — adopts contributor's intent over a much wider site
list and a single centralized helper instead of inline try/except at
each site. 3 of the original PR's 6 sites no longer exist on main
(environments/patches.py deleted, DingTalk refactored to native async);
the equivalent fix lives in tools/environments/modal.py instead.

Co-authored-by: JithendraNara <jithendranaidunara@gmail.com>
---
 acp_adapter/events.py                      |  13 +-
 acp_adapter/permissions.py                 |  27 ++-
 agent/async_utils.py                       |  68 +++++++
 agent/lsp/manager.py                       |   7 +-
 cron/scheduler.py                          |  39 ++--
 gateway/platforms/feishu.py                |  44 ++---
 gateway/run.py                             | 206 ++++++++++++---------
 plugins/memory/hindsight/__init__.py       |   5 +-
 plugins/platforms/google_chat/adapter.py   |  10 +-
 scripts/release.py                         |   1 +
 tests/acp/test_events.py                   |  46 +++++
 tests/acp/test_permissions.py              |  49 ++++-
 tests/agent/test_async_utils.py            | 157 ++++++++++++++++
 tests/tools/test_mcp_probe.py              |  12 +-
 tests/tools/test_mcp_structured_content.py |   3 +-
 tests/tools/test_mcp_tool.py               |  80 +++++++-
 tools/browser_cdp_tool.py                  |   8 +-
 tools/browser_supervisor.py                |  22 ++-
 tools/computer_use/cua_backend.py          |   7 +-
 tools/environments/modal.py                |   7 +-
 tools/mcp_tool.py                          |  52 ++++--
 tools/slash_confirm.py                     |   7 +-
 tui_gateway/ws.py                          |   6 +-
 23 files changed, 690 insertions(+), 186 deletions(-)
 create mode 100644 agent/async_utils.py
 create mode 100644 tests/agent/test_async_utils.py

diff --git a/acp_adapter/events.py b/acp_adapter/events.py
index 1257f902ebb..f0442ca2e8f 100644
--- a/acp_adapter/events.py
+++ b/acp_adapter/events.py
@@ -31,10 +31,17 @@ def _send_update(
     update: Any,
 ) -> None:
     """Fire-and-forget an ACP session update from a worker thread."""
+    from agent.async_utils import safe_schedule_threadsafe
+
+    future = safe_schedule_threadsafe(
+        conn.session_update(session_id, update),
+        loop,
+        logger=logger,
+        log_message="Failed to send ACP update",
+    )
+    if future is None:
+        return
     try:
-        future = asyncio.run_coroutine_threadsafe(
-            conn.session_update(session_id, update), loop
-        )
         future.result(timeout=5)
     except Exception:
         logger.debug("Failed to send ACP update", exc_info=True)
diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py
index 44aead28742..76474e55dac 100644
--- a/acp_adapter/permissions.py
+++ b/acp_adapter/permissions.py
@@ -111,21 +111,28 @@ def make_approval_callback(
         allow_permanent: bool = True,
         **_: object,
     ) -> str:
+        from agent.async_utils import safe_schedule_threadsafe
+
         options = _build_permission_options(allow_permanent=allow_permanent)
 
-        future = None
+        tool_call = _build_permission_tool_call(command, description)
+        coro = request_permission_fn(
+            session_id=session_id,
+            tool_call=tool_call,
+            options=options,
+        )
+        future = safe_schedule_threadsafe(
+            coro, loop,
+            logger=logger,
+            log_message="Permission request: failed to schedule on loop",
+        )
+        if future is None:
+            return "deny"
+
         try:
-            tool_call = _build_permission_tool_call(command, description)
-            coro = request_permission_fn(
-                session_id=session_id,
-                tool_call=tool_call,
-                options=options,
-            )
-            future = asyncio.run_coroutine_threadsafe(coro, loop)
             response = future.result(timeout=timeout)
         except (FutureTimeout, Exception) as exc:
-            if future is not None:
-                future.cancel()
+            future.cancel()
             logger.warning("Permission request timed out or failed: %s", exc)
             return "deny"
 
diff --git a/agent/async_utils.py b/agent/async_utils.py
new file mode 100644
index 00000000000..d268e1a3a84
--- /dev/null
+++ b/agent/async_utils.py
@@ -0,0 +1,68 @@
+"""Async/sync bridging helpers.
+
+The codebase has ~30 sites that schedule a coroutine onto an event loop from a
+worker thread via :func:`asyncio.run_coroutine_threadsafe`.  That function can
+raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
+and when it does the coroutine object is never awaited and never closed —
+which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
+leaks the coroutine's frame until GC.
+
+:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
+scheduling failure, and returns ``None`` (instead of a half-formed future) so
+callers can branch cleanly:
+
+    fut = safe_schedule_threadsafe(coro, loop)
+    if fut is None:
+        return  # or fallback behavior
+    fut.result(timeout=5)
+
+The helper deliberately does NOT also handle ``future.result()`` failures —
+that is a separate concern.  Once the loop has accepted the coroutine, its
+lifecycle belongs to the loop, not the scheduling thread.
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+from concurrent.futures import Future
+from typing import Any, Coroutine, Optional
+
+
+_DEFAULT_LOGGER = logging.getLogger(__name__)
+
+
+def safe_schedule_threadsafe(
+    coro: Coroutine[Any, Any, Any],
+    loop: Optional[asyncio.AbstractEventLoop],
+    *,
+    logger: Optional[logging.Logger] = None,
+    log_message: str = "Failed to schedule coroutine on loop",
+    log_level: int = logging.DEBUG,
+) -> Optional[Future]:
+    """Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
+
+    Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
+    the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
+    (e.g. the loop was closed during a shutdown race).  In all failure paths
+    the coroutine is :meth:`close`-d so it does not trigger
+    ``"coroutine was never awaited"`` warnings or leak its frame.
+
+    Callers retain full control over what to do with the returned future
+    (call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
+    fire-and-forget, etc.).
+    """
+    log = logger if logger is not None else _DEFAULT_LOGGER
+
+    if loop is None:
+        if asyncio.iscoroutine(coro):
+            coro.close()
+        log.log(log_level, "%s: loop is None", log_message)
+        return None
+
+    try:
+        return asyncio.run_coroutine_threadsafe(coro, loop)
+    except Exception as exc:
+        if asyncio.iscoroutine(coro):
+            coro.close()
+        log.log(log_level, "%s: %s", log_message, exc)
+        return None
diff --git a/agent/lsp/manager.py b/agent/lsp/manager.py
index 34c0b0ba92b..7f5feaa170f 100644
--- a/agent/lsp/manager.py
+++ b/agent/lsp/manager.py
@@ -107,9 +107,14 @@ class _BackgroundLoop:
 
         Returns the coroutine's result, or raises its exception.
         """
+        from agent.async_utils import safe_schedule_threadsafe
         if self._loop is None:
+            if asyncio.iscoroutine(coro):
+                coro.close()
             raise RuntimeError("background loop not started")
-        fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        fut = safe_schedule_threadsafe(coro, self._loop)
+        if fut is None:
+            raise RuntimeError("background loop not running")
         try:
             return fut.result(timeout=timeout)
         except Exception:
diff --git a/cron/scheduler.py b/cron/scheduler.py
index b585ef2e42b..d470e8c2c74 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -464,7 +464,14 @@ def _send_media_via_adapter(
             else:
                 coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
 
-            future = asyncio.run_coroutine_threadsafe(coro, loop)
+            from agent.async_utils import safe_schedule_threadsafe
+            future = safe_schedule_threadsafe(coro, loop)
+            if future is None:
+                logger.warning(
+                    "Job '%s': cannot send media %s, gateway loop unavailable",
+                    job.get("id", "?"), media_path,
+                )
+                return
             try:
                 result = future.result(timeout=30)
             except TimeoutError:
@@ -585,22 +592,26 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                 text_to_send = cleaned_delivery_content.strip()
                 adapter_ok = True
                 if text_to_send:
-                    future = asyncio.run_coroutine_threadsafe(
+                    from agent.async_utils import safe_schedule_threadsafe
+                    future = safe_schedule_threadsafe(
                         runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                         loop,
                     )
-                    try:
-                        send_result = future.result(timeout=60)
-                    except TimeoutError:
-                        future.cancel()
-                        raise
-                    if send_result and not getattr(send_result, "success", True):
-                        err = getattr(send_result, "error", "unknown")
-                        logger.warning(
-                            "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
-                            job["id"], platform_name, chat_id, err,
-                        )
-                        adapter_ok = False  # fall through to standalone path
+                    if future is None:
+                        adapter_ok = False
+                    else:
+                        try:
+                            send_result = future.result(timeout=60)
+                        except TimeoutError:
+                            future.cancel()
+                            raise
+                        if send_result and not getattr(send_result, "success", True):
+                            err = getattr(send_result, "error", "unknown")
+                            logger.warning(
+                                "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
+                                job["id"], platform_name, chat_id, err,
+                            )
+                            adapter_ok = False  # fall through to standalone path
 
                 # Send extracted media files as native attachments via the live adapter
                 if adapter_ok and media_files:
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 8d60046d35d..a9b0447080d 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -2273,11 +2273,7 @@ class FeishuAdapter(BasePlatformAdapter):
                     daemon=True,
                 ).start()
             return
-        future = asyncio.run_coroutine_threadsafe(
-            self._handle_message_event_data(data),
-            loop,
-        )
-        future.add_done_callback(self._log_background_failure)
+        self._submit_on_loop(loop, self._handle_message_event_data(data))
 
     def _enqueue_pending_inbound_event(self, data: Any) -> bool:
         """Append an event to the pending-inbound queue.
@@ -2353,16 +2349,12 @@ class FeishuAdapter(BasePlatformAdapter):
                     dispatched = 0
                     requeue: List[Any] = []
                     for event in batch:
-                        try:
-                            fut = asyncio.run_coroutine_threadsafe(
-                                self._handle_message_event_data(event),
-                                loop,
-                            )
-                            fut.add_done_callback(self._log_background_failure)
+                        if self._submit_on_loop(
+                            loop, self._handle_message_event_data(event)
+                        ):
                             dispatched += 1
-                        except RuntimeError:
-                            # Loop closed between check and submit — requeue
-                            # and poll again.
+                        else:
+                            # Loop closed/unavailable — requeue and poll again.
                             requeue.append(event)
                     if requeue:
                         with self._pending_inbound_lock:
@@ -2466,11 +2458,10 @@ class FeishuAdapter(BasePlatformAdapter):
         if not self._loop_accepts_callbacks(loop):
             logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
             return
-        future = asyncio.run_coroutine_threadsafe(
-            handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
+        self._submit_on_loop(
             loop,
+            handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
         )
-        future.add_done_callback(self._log_background_failure)
 
     def _on_reaction_event(self, event_type: str, data: Any) -> None:
         """Route user reactions on bot messages as synthetic text events."""
@@ -2498,11 +2489,7 @@ class FeishuAdapter(BasePlatformAdapter):
             or bool(getattr(loop, "is_closed", lambda: False)())
         ):
             return
-        future = asyncio.run_coroutine_threadsafe(
-            self._handle_reaction_event(event_type, data),
-            loop,
-        )
-        future.add_done_callback(self._log_background_failure)
+        self._submit_on_loop(loop, self._handle_reaction_event(event_type, data))
 
     def _on_card_action_trigger(self, data: Any) -> Any:
         """Handle card-action callback from the Feishu SDK (synchronous).
@@ -2548,11 +2535,14 @@ class FeishuAdapter(BasePlatformAdapter):
 
     def _submit_on_loop(self, loop: Any, coro: Any) -> bool:
         """Schedule background work on the adapter loop with shared failure logging."""
-        try:
-            future = asyncio.run_coroutine_threadsafe(coro, loop)
-        except Exception:
-            coro.close()
-            logger.warning("[Feishu] Failed to schedule background callback work", exc_info=True)
+        from agent.async_utils import safe_schedule_threadsafe
+        future = safe_schedule_threadsafe(
+            coro, loop,
+            logger=logger,
+            log_message="[Feishu] Failed to schedule background callback work",
+            log_level=logging.WARNING,
+        )
+        if future is None:
             return False
         future.add_done_callback(self._log_background_failure)
         return True
diff --git a/gateway/run.py b/gateway/run.py
index 5e8fce8e18d..f41357673f7 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -50,6 +50,7 @@ from typing import Dict, Optional, Any, List, Union
 # gateway is a long-running daemon, so its boot cost matters less than
 # preserving the established test-patch surface.
 from agent.account_usage import fetch_account_usage, render_account_usage_lines
+from agent.async_utils import safe_schedule_threadsafe
 from agent.i18n import t
 from hermes_cli.config import cfg_get
 
@@ -11217,10 +11218,14 @@ class GatewayRunner:
             copied_source = dataclasses.replace(source)
         except Exception:
             copied_source = source
-        future = asyncio.run_coroutine_threadsafe(
+        future = safe_schedule_threadsafe(
             self._rename_telegram_topic_for_session_title(copied_source, session_id, title),
             loop,
+            logger=logger,
+            log_message="Telegram topic title rename failed to schedule",
         )
+        if future is None:
+            return
         def _log_rename_failure(fut) -> None:
             try:
                 fut.result()
@@ -14810,29 +14815,28 @@ class GatewayRunner:
         def _step_callback_sync(iteration: int, prev_tools: list) -> None:
             if not _run_still_current():
                 return
-            try:
-                # prev_tools may be list[str] or list[dict] with "name"/"result"
-                # keys.  Normalise to keep "tool_names" backward-compatible for
-                # user-authored hooks that do ', '.join(tool_names)'.
-                _names: list[str] = []
-                for _t in (prev_tools or []):
-                    if isinstance(_t, dict):
-                        _names.append(_t.get("name") or "")
-                    else:
-                        _names.append(str(_t))
-                asyncio.run_coroutine_threadsafe(
-                    _hooks_ref.emit("agent:step", {
-                        "platform": source.platform.value if source.platform else "",
-                        "user_id": source.user_id,
-                        "session_id": session_id,
-                        "iteration": iteration,
-                        "tool_names": _names,
-                        "tools": prev_tools,
-                    }),
-                    _loop_for_step,
-                )
-            except Exception as _e:
-                logger.debug("agent:step hook error: %s", _e)
+            # prev_tools may be list[str] or list[dict] with "name"/"result"
+            # keys.  Normalise to keep "tool_names" backward-compatible for
+            # user-authored hooks that do ', '.join(tool_names)'.
+            _names: list[str] = []
+            for _t in (prev_tools or []):
+                if isinstance(_t, dict):
+                    _names.append(_t.get("name") or "")
+                else:
+                    _names.append(str(_t))
+            safe_schedule_threadsafe(
+                _hooks_ref.emit("agent:step", {
+                    "platform": source.platform.value if source.platform else "",
+                    "user_id": source.user_id,
+                    "session_id": session_id,
+                    "iteration": iteration,
+                    "tool_names": _names,
+                    "tools": prev_tools,
+                }),
+                _loop_for_step,
+                logger=logger,
+                log_message="agent:step hook scheduling error",
+            )
 
         # Bridge sync status_callback → async adapter.send for context pressure
         _status_adapter = self.adapters.get(source.platform)
@@ -14852,27 +14856,28 @@ class GatewayRunner:
         def _status_callback_sync(event_type: str, message: str) -> None:
             if not _status_adapter or not _run_still_current():
                 return
-            try:
-                _fut = asyncio.run_coroutine_threadsafe(
-                    _status_adapter.send(
-                        _status_chat_id,
-                        message,
-                        metadata=_status_thread_metadata,
-                    ),
-                    _loop_for_step,
-                )
-                if _cleanup_progress:
-                    def _track_status_id(fut) -> None:
-                        try:
-                            res = fut.result()
-                        except Exception:
-                            return
-                        mid = getattr(res, "message_id", None)
-                        if getattr(res, "success", False) and mid:
-                            _cleanup_msg_ids.append(str(mid))
-                    _fut.add_done_callback(_track_status_id)
-            except Exception as _e:
-                logger.debug("status_callback error (%s): %s", event_type, _e)
+            _fut = safe_schedule_threadsafe(
+                _status_adapter.send(
+                    _status_chat_id,
+                    message,
+                    metadata=_status_thread_metadata,
+                ),
+                _loop_for_step,
+                logger=logger,
+                log_message=f"status_callback ({event_type}) scheduling error",
+            )
+            if _fut is None:
+                return
+            if _cleanup_progress:
+                def _track_status_id(fut) -> None:
+                    try:
+                        res = fut.result()
+                    except Exception:
+                        return
+                    mid = getattr(res, "message_id", None)
+                    if getattr(res, "success", False) and mid:
+                        _cleanup_msg_ids.append(str(mid))
+                _fut.add_done_callback(_track_status_id)
 
         def run_sync():
             # The conditional re-assignment of `message` further below
@@ -15026,17 +15031,16 @@ class GatewayRunner:
                     return
                 if already_streamed or not _status_adapter or not str(text or "").strip():
                     return
-                try:
-                    asyncio.run_coroutine_threadsafe(
-                        _status_adapter.send(
-                            _status_chat_id,
-                            text,
-                            metadata=_status_thread_metadata,
-                        ),
-                        _loop_for_step,
-                    )
-                except Exception as _e:
-                    logger.debug("interim_assistant_callback error: %s", _e)
+                safe_schedule_threadsafe(
+                    _status_adapter.send(
+                        _status_chat_id,
+                        text,
+                        metadata=_status_thread_metadata,
+                    ),
+                    _loop_for_step,
+                    logger=logger,
+                    log_message="interim_assistant_callback scheduling error",
+                )
 
             turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
 
@@ -15125,17 +15129,16 @@ class GatewayRunner:
             def _deliver_bg_review_message(message: str) -> None:
                 if not _status_adapter or not _run_still_current():
                     return
-                try:
-                    asyncio.run_coroutine_threadsafe(
-                        _status_adapter.send(
-                            _status_chat_id,
-                            message,
-                            metadata=_status_thread_metadata,
-                        ),
-                        _loop_for_step,
-                    )
-                except Exception as _e:
-                    logger.debug("background_review_callback error: %s", _e)
+                safe_schedule_threadsafe(
+                    _status_adapter.send(
+                        _status_chat_id,
+                        message,
+                        metadata=_status_thread_metadata,
+                    ),
+                    _loop_for_step,
+                    logger=logger,
+                    log_message="background_review_callback scheduling error",
+                )
 
             def _release_bg_review_messages() -> None:
                 _bg_review_release.set()
@@ -15207,23 +15210,28 @@ class GatewayRunner:
                     pass
 
                 send_ok = False
-                try:
-                    fut = asyncio.run_coroutine_threadsafe(
-                        _status_adapter.send_clarify(
-                            chat_id=_status_chat_id,
-                            question=question,
-                            choices=list(choices) if choices else None,
-                            clarify_id=clarify_id,
-                            session_key=session_key or "",
-                            metadata=_status_thread_metadata,
-                        ),
-                        _loop_for_step,
-                    )
-                    result = fut.result(timeout=15)
-                    send_ok = bool(getattr(result, "success", False))
-                except Exception as exc:
-                    logger.warning("Clarify send failed: %s", exc)
+                fut = safe_schedule_threadsafe(
+                    _status_adapter.send_clarify(
+                        chat_id=_status_chat_id,
+                        question=question,
+                        choices=list(choices) if choices else None,
+                        clarify_id=clarify_id,
+                        session_key=session_key or "",
+                        metadata=_status_thread_metadata,
+                    ),
+                    _loop_for_step,
+                    logger=logger,
+                    log_message="Clarify send failed to schedule",
+                )
+                if fut is None:
                     send_ok = False
+                else:
+                    try:
+                        result = fut.result(timeout=15)
+                        send_ok = bool(getattr(result, "success", False))
+                    except Exception as exc:
+                        logger.warning("Clarify send failed: %s", exc)
+                        send_ok = False
 
                 if not send_ok:
                     # Couldn't deliver the prompt — clean up and return
@@ -15343,7 +15351,7 @@ class GatewayRunner:
                 # false positives from MagicMock auto-attribute creation in tests.
                 if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
                     try:
-                        _approval_result = asyncio.run_coroutine_threadsafe(
+                        _approval_fut = safe_schedule_threadsafe(
                             _status_adapter.send_exec_approval(
                                 chat_id=_status_chat_id,
                                 command=cmd,
@@ -15352,7 +15360,12 @@ class GatewayRunner:
                                 metadata=_status_thread_metadata,
                             ),
                             _loop_for_step,
-                        ).result(timeout=15)
+                            logger=logger,
+                            log_message="send_exec_approval scheduling error",
+                        )
+                        if _approval_fut is None:
+                            raise RuntimeError("send_exec_approval: loop unavailable")
+                        _approval_result = _approval_fut.result(timeout=15)
                         if _approval_result.success:
                             return
                         logger.warning(
@@ -15374,14 +15387,18 @@ class GatewayRunner:
                     f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
                 )
                 try:
-                    asyncio.run_coroutine_threadsafe(
+                    _approval_send_fut = safe_schedule_threadsafe(
                         _status_adapter.send(
                             _status_chat_id,
                             msg,
                             metadata=_status_thread_metadata,
                         ),
                         _loop_for_step,
-                    ).result(timeout=15)
+                        logger=logger,
+                        log_message="Approval text-send scheduling error",
+                    )
+                    if _approval_send_fut is not None:
+                        _approval_send_fut.result(timeout=15)
                 except Exception as _e:
                     logger.error("Failed to send approval request: %s", _e)
 
@@ -16343,7 +16360,11 @@ class GatewayRunner:
                         except Exception:
                             pass
                 try:
-                    asyncio.run_coroutine_threadsafe(_delete_all(), _loop_snapshot)
+                    safe_schedule_threadsafe(
+                        _delete_all(), _loop_snapshot,
+                        logger=logger,
+                        log_message="Temp bubble cleanup scheduling error",
+                    )
                 except Exception:
                     pass
 
@@ -16400,10 +16421,13 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
                     # this ticker runs in a background thread. Schedule onto
                     # the gateway event loop and wait briefly for completion
                     # so refresh failures are still logged via the except.
-                    fut = asyncio.run_coroutine_threadsafe(
-                        build_channel_directory(adapters), loop
+                    fut = safe_schedule_threadsafe(
+                        build_channel_directory(adapters), loop,
+                        logger=logger,
+                        log_message="Channel directory refresh scheduling error",
                     )
-                    fut.result(timeout=30)
+                    if fut is not None:
+                        fut.result(timeout=30)
             except Exception as e:
                 logger.debug("Channel directory refresh error: %s", e)
 
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 3a42a320453..52b1ac247f1 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -221,8 +221,11 @@ def _get_loop() -> asyncio.AbstractEventLoop:
 
 def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT):
     """Schedule *coro* on the shared loop and block until done."""
+    from agent.async_utils import safe_schedule_threadsafe
     loop = _get_loop()
-    future = asyncio.run_coroutine_threadsafe(coro, loop)
+    future = safe_schedule_threadsafe(coro, loop)
+    if future is None:
+        raise RuntimeError("Hindsight loop unavailable")
     return future.result(timeout=timeout)
 
 
diff --git a/plugins/platforms/google_chat/adapter.py b/plugins/platforms/google_chat/adapter.py
index 1d58e801f46..d8777bf7101 100644
--- a/plugins/platforms/google_chat/adapter.py
+++ b/plugins/platforms/google_chat/adapter.py
@@ -670,10 +670,18 @@ class GoogleChatAdapter(BasePlatformAdapter):
             logger.warning("[GoogleChat] Loop not accepting callbacks; dropping event")
             return
         try:
-            future = asyncio.run_coroutine_threadsafe(coro, loop)
+            from agent.async_utils import safe_schedule_threadsafe
+            future = safe_schedule_threadsafe(
+                coro, loop,
+                logger=logger,
+                log_message="[GoogleChat] Failed to schedule background callback",
+                log_level=logging.WARNING,
+            )
         except RuntimeError:
             logger.warning("[GoogleChat] Loop closed between check and submit")
             return
+        if future is None:
+            return
         future.add_done_callback(self._log_background_failure)
 
     # ------------------------------------------------------------------
diff --git a/scripts/release.py b/scripts/release.py
index 740b79091b1..c9cd9c173c0 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -62,6 +62,7 @@ AUTHOR_MAP = {
     "nidhi2894@gmail.com": "nidhi-singh02",
     "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
     "oleksii.lisikh@gmail.com": "olisikh",
+    "jithendranaidunara@gmail.com": "JithendraNara",
     "jeremy@geocaching.com": "outdoorsea",
     "leone.parise@gmail.com": "leoneparise",
     "mr@shu.io": "mrshu",
diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py
index c9f91a181ed..56a2687226c 100644
--- a/tests/acp/test_events.py
+++ b/tests/acp/test_events.py
@@ -1,6 +1,8 @@
 """Tests for acp_adapter.events — callback factories for ACP notifications."""
 
 import asyncio
+import gc
+import warnings
 from concurrent.futures import Future
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -10,6 +12,7 @@ import acp
 from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
 
 from acp_adapter.events import (
+    _send_update,
     make_message_cb,
     make_step_cb,
     make_thinking_cb,
@@ -325,3 +328,46 @@ class TestMessageCallback:
             cb("")
 
         mock_rcts.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Scheduler-failure regression
+# ---------------------------------------------------------------------------
+
+class TestSendUpdate:
+    def test_scheduler_failure_closes_update_coroutine(self, event_loop_fixture):
+        """If run_coroutine_threadsafe raises, _send_update must close the coro."""
+        created = {"coro": None}
+
+        async def _session_update(session_id, update):
+            return None
+
+        conn = MagicMock()
+
+        def _capture_update(session_id, update):
+            created["coro"] = _session_update(session_id, update)
+            return created["coro"]
+
+        conn.session_update = _capture_update
+
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            with patch(
+                "agent.async_utils.asyncio.run_coroutine_threadsafe",
+                side_effect=RuntimeError("scheduler down"),
+            ):
+                _send_update(conn, "session-1", event_loop_fixture, {"type": "noop"})
+            gc.collect()
+
+        assert created["coro"] is not None
+        assert created["coro"].cr_frame is None
+        # Only count warnings about THIS test's coroutine; other tests in the
+        # same xdist worker (or stdlib mock internals) may emit unrelated
+        # "coroutine was never awaited" warnings that bleed through.
+        runtime_warnings = [
+            w for w in caught
+            if issubclass(w.category, RuntimeWarning)
+            and "was never awaited" in str(w.message)
+            and "_session_update" in str(w.message)
+        ]
+        assert runtime_warnings == []
diff --git a/tests/acp/test_permissions.py b/tests/acp/test_permissions.py
index 8bbdeeb392a..b4c121829dc 100644
--- a/tests/acp/test_permissions.py
+++ b/tests/acp/test_permissions.py
@@ -38,7 +38,7 @@ def _invoke_callback(
         scheduled["loop"] = passed_loop
         return future
 
-    with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
+    with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
         cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=timeout)
         if use_prompt_path:
             result = prompt_dangerous_approval(
@@ -135,7 +135,7 @@ class TestApprovalBridge:
             scheduled["loop"] = passed_loop
             return future
 
-        with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
+        with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
             cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=0.01)
             result = cb("rm -rf /", "dangerous command")
 
@@ -159,10 +159,53 @@ class TestApprovalBridge:
             scheduled["loop"] = passed_loop
             return future
 
-        with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
+        with patch("agent.async_utils.asyncio.run_coroutine_threadsafe", side_effect=_schedule):
             cb = make_approval_callback(request_permission, loop, session_id="s1", timeout=1.0)
             result = cb("echo hi", "demo")
 
         scheduled["coro"].close()
 
         assert result == "deny"
+
+
+# ---------------------------------------------------------------------------
+# Scheduler-failure regression
+# ---------------------------------------------------------------------------
+
+import gc  # noqa: E402
+import warnings  # noqa: E402
+
+
+class TestSchedulerFailure:
+    def test_scheduler_failure_closes_permission_coroutine(self):
+        """If run_coroutine_threadsafe raises, the coro is closed and we return 'deny'."""
+        loop = MagicMock(spec=asyncio.AbstractEventLoop)
+        created = {"coro": None}
+
+        async def _response_coro(**kwargs):
+            return _make_response(AllowedOutcome(option_id="allow_once", outcome="selected"))
+
+        def _request_permission(**kwargs):
+            created["coro"] = _response_coro(**kwargs)
+            return created["coro"]
+
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            with patch(
+                "agent.async_utils.asyncio.run_coroutine_threadsafe",
+                side_effect=RuntimeError("scheduler down"),
+            ):
+                cb = make_approval_callback(_request_permission, loop, session_id="s1", timeout=0.01)
+                result = cb("rm -rf /", "dangerous")
+            gc.collect()
+
+        assert result == "deny"
+        assert created["coro"] is not None
+        assert created["coro"].cr_frame is None
+        runtime_warnings = [
+            w for w in caught
+            if issubclass(w.category, RuntimeWarning)
+            and "was never awaited" in str(w.message)
+            and "_response_coro" in str(w.message)
+        ]
+        assert runtime_warnings == []
diff --git a/tests/agent/test_async_utils.py b/tests/agent/test_async_utils.py
new file mode 100644
index 00000000000..33ce84ee0c6
--- /dev/null
+++ b/tests/agent/test_async_utils.py
@@ -0,0 +1,157 @@
+"""Tests for agent.async_utils.safe_schedule_threadsafe."""
+
+from __future__ import annotations
+
+import asyncio
+import gc
+import warnings
+from concurrent.futures import Future
+from unittest.mock import patch
+
+import pytest
+
+from agent.async_utils import safe_schedule_threadsafe
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _no_unawaited_warnings(caught, *, coro_name: str = "") -> bool:
+    """Return True if no "X was never awaited" warning slipped through.
+
+    When *coro_name* is provided, only warnings naming that coroutine are
+    counted — xdist workers may emit unrelated unawaited-coroutine warnings
+    (e.g. ``AsyncMockMixin._execute_mock_call``) from concurrent tests.
+    """
+    bad = [
+        w for w in caught
+        if issubclass(w.category, RuntimeWarning)
+        and "was never awaited" in str(w.message)
+        and (not coro_name or coro_name in str(w.message))
+    ]
+    return not bad
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestSafeScheduleThreadsafe:
+    def test_returns_future_on_success(self):
+        loop = asyncio.new_event_loop()
+        try:
+            import threading
+            ready = threading.Event()
+            stop = threading.Event()
+
+            def _runner():
+                asyncio.set_event_loop(loop)
+                ready.set()
+                loop.run_until_complete(_wait_for_stop(stop))
+
+            async def _wait_for_stop(ev):
+                while not ev.is_set():
+                    await asyncio.sleep(0.005)
+
+            t = threading.Thread(target=_runner, daemon=True)
+            t.start()
+            ready.wait(timeout=2)
+
+            async def _sample():
+                return 42
+
+            fut = safe_schedule_threadsafe(_sample(), loop)
+            assert isinstance(fut, Future)
+            assert fut.result(timeout=2) == 42
+
+            stop.set()
+            t.join(timeout=2)
+        finally:
+            if loop.is_running():
+                loop.call_soon_threadsafe(loop.stop)
+            loop.close()
+
+    def test_closed_loop_returns_none_and_closes_coroutine(self):
+        loop = asyncio.new_event_loop()
+        loop.close()
+
+        async def _sample():
+            return "ok"
+
+        coro = _sample()
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            result = safe_schedule_threadsafe(coro, loop)
+            del coro
+            gc.collect()
+
+        assert result is None
+        assert _no_unawaited_warnings(caught, coro_name='_sample')
+
+    def test_none_loop_returns_none_and_closes_coroutine(self):
+        async def _sample():
+            return "ok"
+
+        coro = _sample()
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            result = safe_schedule_threadsafe(coro, None)
+            del coro
+            gc.collect()
+
+        assert result is None
+        assert _no_unawaited_warnings(caught, coro_name='_sample')
+
+    def test_scheduling_exception_closes_coroutine(self):
+        """If run_coroutine_threadsafe raises, close the coroutine and return None."""
+        # A loop that *looks* open but raises on submission
+        loop = asyncio.new_event_loop()
+        try:
+            async def _sample():
+                return "ok"
+
+            coro = _sample()
+            with warnings.catch_warnings(record=True) as caught:
+                warnings.simplefilter("always")
+                with patch(
+                    "agent.async_utils.asyncio.run_coroutine_threadsafe",
+                    side_effect=RuntimeError("scheduler down"),
+                ):
+                    result = safe_schedule_threadsafe(coro, loop)
+                del coro
+                gc.collect()
+
+            assert result is None
+            assert _no_unawaited_warnings(caught, coro_name='_sample')
+        finally:
+            loop.close()
+
+    def test_logs_at_specified_level(self, caplog):
+        import logging
+        loop = asyncio.new_event_loop()
+        loop.close()
+
+        async def _sample():
+            return None
+
+        custom = logging.getLogger("test_async_utils")
+        with caplog.at_level(logging.WARNING, logger="test_async_utils"):
+            result = safe_schedule_threadsafe(
+                _sample(), loop,
+                logger=custom,
+                log_message="custom-msg",
+                log_level=logging.WARNING,
+            )
+
+        assert result is None
+        assert any("custom-msg" in rec.message for rec in caplog.records)
+
+    def test_non_coroutine_arg_does_not_crash(self):
+        """Defensive: even if the caller hands us something weird, don't blow up."""
+        loop = asyncio.new_event_loop()
+        loop.close()
+
+        # Pass a non-coroutine sentinel
+        result = safe_schedule_threadsafe("not-a-coroutine", loop)  # type: ignore[arg-type]
+        assert result is None
diff --git a/tests/tools/test_mcp_probe.py b/tests/tools/test_mcp_probe.py
index 46459e44c87..89d4d1478d1 100644
--- a/tests/tools/test_mcp_probe.py
+++ b/tests/tools/test_mcp_probe.py
@@ -69,7 +69,8 @@ class TestProbeMcpServerTools:
              patch("tools.mcp_tool._stop_mcp_loop"):
 
             # Simulate running the async probe
-            def run_coro(coro, timeout=120):
+            def run_coro(coro_or_factory, timeout=120):
+                coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
                 loop = asyncio.new_event_loop()
                 try:
                     return loop.run_until_complete(coro)
@@ -110,7 +111,8 @@ class TestProbeMcpServerTools:
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
              patch("tools.mcp_tool._stop_mcp_loop"):
 
-            def run_coro(coro, timeout=120):
+            def run_coro(coro_or_factory, timeout=120):
+                coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
                 loop = asyncio.new_event_loop()
                 try:
                     return loop.run_until_complete(coro)
@@ -144,7 +146,8 @@ class TestProbeMcpServerTools:
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
              patch("tools.mcp_tool._stop_mcp_loop"):
 
-            def run_coro(coro, timeout=120):
+            def run_coro(coro_or_factory, timeout=120):
+                coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
                 loop = asyncio.new_event_loop()
                 try:
                     return loop.run_until_complete(coro)
@@ -198,7 +201,8 @@ class TestProbeMcpServerTools:
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
              patch("tools.mcp_tool._stop_mcp_loop"):
 
-            def run_coro(coro, timeout=120):
+            def run_coro(coro_or_factory, timeout=120):
+                coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
                 loop = asyncio.new_event_loop()
                 try:
                     return loop.run_until_complete(coro)
diff --git a/tests/tools/test_mcp_structured_content.py b/tests/tools/test_mcp_structured_content.py
index 2870ce1e860..f4cda00f9f0 100644
--- a/tests/tools/test_mcp_structured_content.py
+++ b/tests/tools/test_mcp_structured_content.py
@@ -31,7 +31,8 @@ class _FakeCallToolResult:
         self.structuredContent = structuredContent
 
 
-def _fake_run_on_mcp_loop(coro, timeout=30):
+def _fake_run_on_mcp_loop(coro_or_factory, timeout=30):
+    coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
     """Run an MCP coroutine directly in a fresh event loop."""
     loop = asyncio.new_event_loop()
     try:
diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index 5558a0df48c..7f6c3f6704c 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -397,6 +397,77 @@ class TestCheckFunction:
             _servers.pop("test_server", None)
 
 
+# ---------------------------------------------------------------------------
+# MCP loop runner
+# ---------------------------------------------------------------------------
+
+class TestRunOnMcpLoop:
+    def test_scheduler_failure_closes_factory_coroutine(self):
+        """If run_coroutine_threadsafe raises, the factory's coroutine is closed."""
+        import gc
+        import warnings
+        import tools.mcp_tool as mcp
+
+        created = {"coro": None}
+
+        async def _sample():
+            return "ok"
+
+        def factory():
+            created["coro"] = _sample()
+            return created["coro"]
+
+        fake_loop = MagicMock()
+        fake_loop.is_running.return_value = True
+
+        with patch.object(mcp, "_mcp_loop", fake_loop):
+            with warnings.catch_warnings(record=True) as caught:
+                warnings.simplefilter("always")
+                with patch(
+                    "agent.async_utils.asyncio.run_coroutine_threadsafe",
+                    side_effect=RuntimeError("scheduler down"),
+                ):
+                    with pytest.raises(RuntimeError):
+                        mcp._run_on_mcp_loop(factory)
+                gc.collect()
+
+        assert created["coro"] is not None
+        assert created["coro"].cr_frame is None
+        runtime_warnings = [
+            w for w in caught
+            if issubclass(w.category, RuntimeWarning)
+            and "was never awaited" in str(w.message)
+            and "_sample" in str(w.message)
+        ]
+        assert runtime_warnings == []
+
+    def test_dead_loop_closes_passed_coroutine(self):
+        """If loop is None, a passed coroutine (not factory) is closed."""
+        import gc
+        import warnings
+        import tools.mcp_tool as mcp
+
+        async def _sample():
+            return "ok"
+
+        coro = _sample()
+        with patch.object(mcp, "_mcp_loop", None):
+            with warnings.catch_warnings(record=True) as caught:
+                warnings.simplefilter("always")
+                with pytest.raises(RuntimeError, match="not running"):
+                    mcp._run_on_mcp_loop(coro)
+                gc.collect()
+
+        assert coro.cr_frame is None
+        runtime_warnings = [
+            w for w in caught
+            if issubclass(w.category, RuntimeWarning)
+            and "was never awaited" in str(w.message)
+            and "_sample" in str(w.message)
+        ]
+        assert runtime_warnings == []
+
+
 # ---------------------------------------------------------------------------
 # Tool handler
 # ---------------------------------------------------------------------------
@@ -406,7 +477,8 @@ class TestToolHandler:
 
     def _patch_mcp_loop(self, coro_side_effect=None):
         """Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
-        def fake_run(coro, timeout=30):
+        def fake_run(coro_or_factory, timeout=30):
+            coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
             return asyncio.run(coro)
         if coro_side_effect:
             return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=coro_side_effect)
@@ -485,7 +557,8 @@ class TestToolHandler:
 
         try:
             handler = _make_tool_handler("test_srv", "greet", 120)
-            def _interrupting_run(coro, timeout=30):
+            def _interrupting_run(coro_or_factory, timeout=30):
+                coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
                 coro.close()
                 raise InterruptedError("User sent a new message")
             with patch(
@@ -1792,7 +1865,8 @@ class TestUtilityHandlers:
 
     def _patch_mcp_loop(self):
         """Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
-        def fake_run(coro, timeout=30):
+        def fake_run(coro_or_factory, timeout=30):
+            coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
             return asyncio.run(coro)
         return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=fake_run)
 
diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py
index 8e829556a57..f10a1541923 100644
--- a/tools/browser_cdp_tool.py
+++ b/tools/browser_cdp_tool.py
@@ -274,7 +274,13 @@ def _browser_cdp_via_supervisor(
         )
 
     try:
-        fut = _asyncio.run_coroutine_threadsafe(_do_cdp(), loop)
+        from agent.async_utils import safe_schedule_threadsafe
+        fut = safe_schedule_threadsafe(_do_cdp(), loop)
+        if fut is None:
+            return tool_error(
+                "CDP call via supervisor failed: loop unavailable",
+                cdp_docs=CDP_DOCS_URL,
+            )
         result_msg = fut.result(timeout=timeout + 2)
     except Exception as exc:
         return tool_error(
diff --git a/tools/browser_supervisor.py b/tools/browser_supervisor.py
index af8d40ee185..73dd3e51bb5 100644
--- a/tools/browser_supervisor.py
+++ b/tools/browser_supervisor.py
@@ -368,11 +368,13 @@ class CDPSupervisor:
                         pass
 
             try:
-                fut = asyncio.run_coroutine_threadsafe(_close_ws(), loop)
-                try:
-                    fut.result(timeout=2.0)
-                except Exception:
-                    pass
+                from agent.async_utils import safe_schedule_threadsafe
+                fut = safe_schedule_threadsafe(_close_ws(), loop)
+                if fut is not None:
+                    try:
+                        fut.result(timeout=2.0)
+                    except Exception:
+                        pass
             except RuntimeError:
                 pass  # loop already shutting down
         if self._thread is not None:
@@ -451,7 +453,10 @@ class CDPSupervisor:
             )
 
         try:
-            fut = asyncio.run_coroutine_threadsafe(_do_respond(), loop)
+            from agent.async_utils import safe_schedule_threadsafe
+            fut = safe_schedule_threadsafe(_do_respond(), loop)
+            if fut is None:
+                return {"ok": False, "error": "Browser supervisor loop unavailable"}
             fut.result(timeout=timeout)
         except Exception as e:
             return {"ok": False, "error": f"{type(e).__name__}: {e}"}
@@ -507,7 +512,10 @@ class CDPSupervisor:
             )
 
         try:
-            fut = asyncio.run_coroutine_threadsafe(_do_eval(), loop)
+            from agent.async_utils import safe_schedule_threadsafe
+            fut = safe_schedule_threadsafe(_do_eval(), loop)
+            if fut is None:
+                return {"ok": False, "error": "Browser supervisor loop unavailable"}
             response = fut.result(timeout=timeout + 1)
         except Exception as exc:
             return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
diff --git a/tools/computer_use/cua_backend.py b/tools/computer_use/cua_backend.py
index df1162c5d79..96aab60f8c7 100644
--- a/tools/computer_use/cua_backend.py
+++ b/tools/computer_use/cua_backend.py
@@ -183,9 +183,14 @@ class _AsyncBridge:
             raise RuntimeError("cua-driver asyncio bridge failed to start")
 
     def run(self, coro, timeout: Optional[float] = 30.0) -> Any:
+        from agent.async_utils import safe_schedule_threadsafe
         if not self._loop or not self._thread or not self._thread.is_alive():
+            if asyncio.iscoroutine(coro):
+                coro.close()
+            raise RuntimeError("cua-driver bridge not started")
+        fut = safe_schedule_threadsafe(coro, self._loop)
+        if fut is None:
             raise RuntimeError("cua-driver bridge not started")
-        fut: Future = asyncio.run_coroutine_threadsafe(coro, self._loop)
         return fut.result(timeout=timeout)
 
     def stop(self) -> None:
diff --git a/tools/environments/modal.py b/tools/environments/modal.py
index 1a230d85603..3137b322113 100644
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@@ -144,9 +144,14 @@ class _AsyncWorker:
         self._loop.run_forever()
 
     def run_coroutine(self, coro, timeout=600):
+        from agent.async_utils import safe_schedule_threadsafe
         if self._loop is None or self._loop.is_closed():
+            if asyncio.iscoroutine(coro):
+                coro.close()
+            raise RuntimeError("AsyncWorker loop is not running")
+        future = safe_schedule_threadsafe(coro, self._loop)
+        if future is None:
             raise RuntimeError("AsyncWorker loop is not running")
-        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
         return future.result(timeout=timeout)
 
     def stop(self):
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index c2668395e5d..ba104cc4273 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1781,7 +1781,7 @@ def _handle_auth_error_and_retry(
         return await manager.handle_401(server_name, None)
 
     try:
-        recovered = _run_on_mcp_loop(_recover(), timeout=10)
+        recovered = _run_on_mcp_loop(_recover, timeout=10)
     except Exception as rec_exc:
         logger.warning(
             "MCP OAuth '%s': recovery attempt failed: %s",
@@ -2054,19 +2054,35 @@ def _ensure_mcp_loop():
         _mcp_thread.start()
 
 
-def _run_on_mcp_loop(coro, timeout: float = 30):
+def _run_on_mcp_loop(coro_or_factory, timeout: float = 30):
     """Schedule a coroutine on the MCP event loop and block until done.
 
+    Accepts either a coroutine object or a zero-arg callable that returns one.
+    Callers can pass a factory to avoid constructing coroutine objects when
+    the MCP loop is unavailable (which would otherwise leak the coroutine
+    frame and emit ``"coroutine was never awaited"`` warnings).
+
     Poll in short intervals so the calling agent thread can honor user
     interrupts while the MCP work is still running on the background loop.
     """
     from tools.interrupt import is_interrupted
+    from agent.async_utils import safe_schedule_threadsafe
 
     with _lock:
         loop = _mcp_loop
     if loop is None or not loop.is_running():
+        if asyncio.iscoroutine(coro_or_factory):
+            coro_or_factory.close()
         raise RuntimeError("MCP event loop is not running")
-    future = asyncio.run_coroutine_threadsafe(coro, loop)
+
+    coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
+    future = safe_schedule_threadsafe(
+        coro, loop,
+        logger=logger,
+        log_message="MCP scheduling failed",
+    )
+    if future is None:
+        raise RuntimeError("MCP event loop unavailable (failed to schedule)")
     start_time = time.monotonic()
     deadline = None if timeout is None else start_time + timeout
 
@@ -2263,7 +2279,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
             return json.dumps({"result": text_result}, ensure_ascii=False)
 
         def _call_once():
-            return _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            return _run_on_mcp_loop(_call, timeout=tool_timeout)
 
         try:
             result = _call_once()
@@ -2343,7 +2359,7 @@ def _make_list_resources_handler(server_name: str, tool_timeout: float):
             return json.dumps({"resources": resources}, ensure_ascii=False)
 
         def _call_once():
-            return _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            return _run_on_mcp_loop(_call, timeout=tool_timeout)
 
         try:
             return _call_once()
@@ -2403,7 +2419,7 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float):
             return json.dumps({"result": "\n".join(parts) if parts else ""}, ensure_ascii=False)
 
         def _call_once():
-            return _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            return _run_on_mcp_loop(_call, timeout=tool_timeout)
 
         try:
             return _call_once()
@@ -2466,7 +2482,7 @@ def _make_list_prompts_handler(server_name: str, tool_timeout: float):
             return json.dumps({"prompts": prompts}, ensure_ascii=False)
 
         def _call_once():
-            return _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            return _run_on_mcp_loop(_call, timeout=tool_timeout)
 
         try:
             return _call_once()
@@ -2537,7 +2553,7 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float):
             return json.dumps(resp, ensure_ascii=False)
 
         def _call_once():
-            return _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            return _run_on_mcp_loop(_call, timeout=tool_timeout)
 
         try:
             return _call_once()
@@ -3121,7 +3137,7 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
     if _was_interrupted:
         _set_interrupt(False)
     try:
-        _run_on_mcp_loop(_discover_all(), timeout=120)
+        _run_on_mcp_loop(_discover_all, timeout=120)
     finally:
         if _was_interrupted:
             _set_interrupt(True)
@@ -3289,7 +3305,7 @@ def probe_mcp_server_tools() -> Dict[str, List[tuple]]:
         )
 
     try:
-        _run_on_mcp_loop(_probe_all(), timeout=120)
+        _run_on_mcp_loop(_probe_all, timeout=120)
     except Exception as exc:
         logger.debug("MCP probe failed: %s", exc)
     finally:
@@ -3329,11 +3345,17 @@ def shutdown_mcp_servers():
     with _lock:
         loop = _mcp_loop
     if loop is not None and loop.is_running():
-        try:
-            future = asyncio.run_coroutine_threadsafe(_shutdown(), loop)
-            future.result(timeout=15)
-        except Exception as exc:
-            logger.debug("Error during MCP shutdown: %s", exc)
+        from agent.async_utils import safe_schedule_threadsafe
+        future = safe_schedule_threadsafe(
+            _shutdown(), loop,
+            logger=logger,
+            log_message="MCP shutdown: failed to schedule",
+        )
+        if future is not None:
+            try:
+                future.result(timeout=15)
+            except Exception as exc:
+                logger.debug("Error during MCP shutdown: %s", exc)
 
     _stop_mcp_loop()
 
diff --git a/tools/slash_confirm.py b/tools/slash_confirm.py
index 81c15263527..21db18fe319 100644
--- a/tools/slash_confirm.py
+++ b/tools/slash_confirm.py
@@ -153,9 +153,14 @@ def resolve_sync_compat(
     Prefer the async ``resolve()`` from an async context.
     """
     try:
-        fut = asyncio.run_coroutine_threadsafe(
+        from agent.async_utils import safe_schedule_threadsafe
+        fut = safe_schedule_threadsafe(
             resolve(session_key, confirm_id, choice), loop,
+            logger=logger,
+            log_message="resolve_sync_compat scheduling failed",
         )
+        if fut is None:
+            return None
         return fut.result(timeout=30)
     except Exception as exc:
         logger.error("resolve_sync_compat failed: %s", exc)
diff --git a/tui_gateway/ws.py b/tui_gateway/ws.py
index 1661811dbd6..a5879ef3a1c 100644
--- a/tui_gateway/ws.py
+++ b/tui_gateway/ws.py
@@ -83,7 +83,11 @@ class WSTransport:
             return True
 
         try:
-            fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop)
+            from agent.async_utils import safe_schedule_threadsafe
+            fut = safe_schedule_threadsafe(self._safe_send(line), self._loop)
+            if fut is None:
+                self._closed = True
+                return False
             fut.result(timeout=_WS_WRITE_TIMEOUT_S)
             return not self._closed
         except Exception as exc:

From 13c3d4b4efa2f39d7bc3178cf3eca77167ff7699 Mon Sep 17 00:00:00 2001
From: kchantharuan <kchantharuan@nvidia.com>
Date: Wed, 13 May 2026 12:46:07 -0700
Subject: [PATCH 055/218] feat(nvidia): add NIM billing origin header

---
 agent/auxiliary_client.py                     | 25 +++++++
 run_agent.py                                  | 30 ++++++--
 tests/agent/test_auxiliary_client.py          | 41 +++++++++++
 tests/providers/test_provider_profiles.py     |  4 ++
 .../test_provider_attribution_headers.py      | 68 +++++++++++++++++++
 5 files changed, 162 insertions(+), 6 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index cd655e70e56..1c7dd9f7497 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict:
 
     return headers
 
+
+# NVIDIA NIM cloud billing attribution.  Keep this host-gated because the
+# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL.
+_NVIDIA_NIM_CLOUD_HEADERS = {
+    "X-BILLING-INVOKE-ORIGIN": "HermesAgent",
+}
+
+
+def build_nvidia_nim_headers(base_url: str | None) -> dict:
+    """Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic."""
+    if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"):
+        return dict(_NVIDIA_NIM_CLOUD_HEADERS)
+    return {}
+
+
 # Vercel AI Gateway app attribution headers. HTTP-Referer maps to
 # referrerUrl and X-Title maps to appName in the gateway's analytics.
 from hermes_cli import __version__ as _HERMES_VERSION
@@ -1372,6 +1387,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                 from hermes_cli.models import copilot_default_headers
 
                 extra["default_headers"] = copilot_default_headers()
+            elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+                extra["default_headers"] = build_nvidia_nim_headers(base_url)
             else:
                 try:
                     from providers import get_provider_profile as _gpf_aux
@@ -1407,6 +1424,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             from hermes_cli.models import copilot_default_headers
 
             extra["default_headers"] = copilot_default_headers()
+        elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+            extra["default_headers"] = build_nvidia_nim_headers(base_url)
         else:
             try:
                 from providers import get_provider_profile as _gpf_aux2
@@ -2690,6 +2709,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
         )
     elif base_url_host_matches(sync_base_url, "api.kimi.com"):
         async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+    elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"):
+        async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url)
     else:
         # Fall back to profile.default_headers for providers that declare
         # client-level headers on their ProviderProfile (e.g. attribution
@@ -2951,6 +2972,8 @@ def resolve_provider_client(
                 extra["default_headers"] = copilot_request_headers(
                     is_agent_turn=True, is_vision=is_vision
                 )
+            elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"):
+                extra["default_headers"] = build_nvidia_nim_headers(custom_base)
             else:
                 # Fall back to profile.default_headers for providers that
                 # declare client-level attribution headers on their profile.
@@ -3149,6 +3172,8 @@ def resolve_provider_client(
             headers.update(copilot_request_headers(
                 is_agent_turn=True, is_vision=is_vision
             ))
+        elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+            headers.update(build_nvidia_nim_headers(base_url))
         else:
             # Fall back to profile.default_headers for providers that declare
             # client-level attribution headers on their profile (e.g. GMI
diff --git a/run_agent.py b/run_agent.py
index a82c6417ae1..7e42beb3eba 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1664,6 +1664,9 @@ class AIAgent:
                 if base_url_host_matches(effective_base, "openrouter.ai"):
                     from agent.auxiliary_client import build_or_headers
                     client_kwargs["default_headers"] = build_or_headers()
+                elif base_url_host_matches(effective_base, "integrate.api.nvidia.com"):
+                    from agent.auxiliary_client import build_nvidia_nim_headers
+                    client_kwargs["default_headers"] = build_nvidia_nim_headers(effective_base)
                 elif base_url_host_matches(effective_base, "api.routermint.com"):
                     client_kwargs["default_headers"] = _routermint_headers()
                 elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
@@ -1702,9 +1705,15 @@ class AIAgent:
                     }
                     if _provider_timeout is not None:
                         client_kwargs["timeout"] = _provider_timeout
-                    # Preserve any default_headers the router set
-                    if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
-                        client_kwargs["default_headers"] = dict(_routed_client._default_headers)
+                    # Preserve provider-specific headers the router set.  The
+                    # OpenAI SDK stores caller-provided default_headers in
+                    # _custom_headers; older/mocked clients may expose
+                    # _default_headers instead.
+                    _routed_headers = getattr(_routed_client, "_custom_headers", None)
+                    if not _routed_headers:
+                        _routed_headers = getattr(_routed_client, "_default_headers", None)
+                    if _routed_headers:
+                        client_kwargs["default_headers"] = dict(_routed_headers)
                 else:
                     # When the user explicitly chose a non-OpenRouter provider
                     # but no credentials were found, fail fast with a clear
@@ -1753,8 +1762,11 @@ class AIAgent:
                                 }
                                 if _provider_timeout is not None:
                                     client_kwargs["timeout"] = _provider_timeout
-                                if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers:
-                                    client_kwargs["default_headers"] = dict(_fb_client._default_headers)
+                                _fb_headers = getattr(_fb_client, "_custom_headers", None)
+                                if not _fb_headers:
+                                    _fb_headers = getattr(_fb_client, "_default_headers", None)
+                                if _fb_headers:
+                                    client_kwargs["default_headers"] = dict(_fb_headers)
                                 _fb_resolved = True
                                 break
                         if not _fb_resolved:
@@ -7334,12 +7346,18 @@ class AIAgent:
         return True
 
     def _apply_client_headers_for_base_url(self, base_url: str) -> None:
-        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers
+        from agent.auxiliary_client import (
+            _AI_GATEWAY_HEADERS,
+            build_nvidia_nim_headers,
+            build_or_headers,
+        )
 
         if base_url_host_matches(base_url, "openrouter.ai"):
             self._client_kwargs["default_headers"] = build_or_headers()
         elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
             self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
+        elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+            self._client_kwargs["default_headers"] = build_nvidia_nim_headers(base_url)
         elif base_url_host_matches(base_url, "api.routermint.com"):
             self._client_kwargs["default_headers"] = _routermint_headers()
         elif base_url_host_matches(base_url, "api.githubcopilot.com"):
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index c25ca219379..9dd85762956 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -2415,10 +2415,51 @@ def _clean_env(monkeypatch):
     """Strip provider env vars so each test starts clean."""
     for key in (
         "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+        "NVIDIA_API_KEY", "NVIDIA_BASE_URL",
     ):
         monkeypatch.delenv(key, raising=False)
 
 
+class TestNvidiaBillingHeaders:
+    """NVIDIA NIM billing-origin headers are scoped to NVIDIA cloud."""
+
+    def test_resolve_provider_client_cloud_adds_billing_origin_header(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key")
+        monkeypatch.delenv("NVIDIA_BASE_URL", raising=False)
+        mock_openai = MagicMock()
+        mock_openai.return_value = MagicMock(name="nvidia-client")
+
+        with patch("agent.auxiliary_client.OpenAI", mock_openai):
+            client, model = resolve_provider_client(
+                provider="nvidia",
+                model="nvidia/test-model",
+            )
+
+        assert client is not None
+        assert model == "nvidia/test-model"
+        call_kwargs = mock_openai.call_args[1]
+        headers = call_kwargs["default_headers"]
+        assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+    def test_resolve_provider_client_local_nim_skips_billing_origin_header(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key")
+        monkeypatch.setenv("NVIDIA_BASE_URL", "http://localhost:8000/v1")
+        mock_openai = MagicMock()
+        mock_openai.return_value = MagicMock(name="nvidia-local-client")
+
+        with patch("agent.auxiliary_client.OpenAI", mock_openai):
+            client, model = resolve_provider_client(
+                provider="nvidia",
+                model="nvidia/test-model",
+            )
+
+        assert client is not None
+        assert model == "nvidia/test-model"
+        call_kwargs = mock_openai.call_args[1]
+        headers = call_kwargs.get("default_headers", {})
+        assert "X-BILLING-INVOKE-ORIGIN" not in headers
+
+
 class TestOpenRouterExplicitApiKey:
     """Test that explicit_api_key is correctly propagated to _try_openrouter()."""
 
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
index c79ed2aea9b..df96a80fd80 100644
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -42,6 +42,10 @@ class TestNvidiaProfile:
         p = get_provider_profile("nvidia")
         assert "nvidia.com" in p.base_url
 
+    def test_billing_header_not_profile_wide(self):
+        p = get_provider_profile("nvidia")
+        assert p.default_headers == {}
+
 
 class TestKimiProfile:
     def test_temperature_omit(self):
diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py
index 2a1d9088c46..a4ce301a857 100644
--- a/tests/run_agent/test_provider_attribution_headers.py
+++ b/tests/run_agent/test_provider_attribution_headers.py
@@ -3,6 +3,7 @@
 Mirrors the OpenRouter pattern for the Vercel AI Gateway so that
 referrerUrl / appName / User-Agent flow into gateway analytics.
 """
+from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 from run_agent import AIAgent
@@ -65,6 +66,73 @@ def test_routermint_base_url_applies_user_agent_header(mock_openai):
     assert headers["User-Agent"].startswith("HermesAgent/")
 
 
+@patch("run_agent.OpenAI")
+def test_nvidia_cloud_base_url_applies_billing_origin_header(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://integrate.api.nvidia.com/v1",
+        model="nvidia/test-model",
+        provider="nvidia",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    assert agent._client_kwargs["default_headers"]["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+    agent._apply_client_headers_for_base_url("https://integrate.api.nvidia.com/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+
+@patch("run_agent.OpenAI")
+def test_nvidia_local_base_url_does_not_apply_billing_origin_header(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://integrate.api.nvidia.com/v1",
+        model="nvidia/test-model",
+        provider="nvidia",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._client_kwargs["default_headers"] = {
+        "X-BILLING-INVOKE-ORIGIN": "HermesAgent",
+    }
+
+    agent._apply_client_headers_for_base_url("http://localhost:8000/v1")
+
+    assert "default_headers" not in agent._client_kwargs
+
+
+@patch("run_agent.OpenAI")
+def test_routed_client_preserves_openai_sdk_custom_headers(mock_openai):
+    mock_openai.return_value = MagicMock()
+    routed_client = SimpleNamespace(
+        api_key="test-key",
+        base_url="https://integrate.api.nvidia.com/v1",
+        _custom_headers={"X-BILLING-INVOKE-ORIGIN": "HermesAgent"},
+    )
+
+    with patch("agent.auxiliary_client.resolve_provider_client", return_value=(
+        routed_client,
+        "nvidia/test-model",
+    )):
+        agent = AIAgent(
+            provider="nvidia",
+            model="nvidia/test-model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+
 @patch("run_agent.OpenAI")
 def test_gmi_base_url_picks_up_profile_user_agent(mock_openai):
     """GMI declares User-Agent on its ProviderProfile.default_headers.

From 6fc0fa6e50a2eb6307c1e5afbeff360708b734ef Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:00:13 -0700
Subject: [PATCH 056/218] chore(release): add AUTHOR_MAP entry for
 kchantharuan@nvidia.com

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index c9cd9c173c0..aafa626329e 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -755,6 +755,7 @@ AUTHOR_MAP = {
     "zhujianxyz@gmail.com": "opriz",
     "tuancanhnguyen706@gmail.com": "xxxigm",
     "asurla@nvidia.com": "anniesurla",
+    "kchantharuan@nvidia.com": "nv-kasikritc",
     "limkuan24@gmail.com": "WideLee",
     "aviralarora002@gmail.com": "AviArora02-commits",
     "draixagent@gmail.com": "draix",

From 4444d5fe4f65dcbca939a1f39ae58438205e7dad Mon Sep 17 00:00:00 2001
From: HenkDz <noonou7@gmail.com>
Date: Fri, 15 May 2026 15:26:08 +0100
Subject: [PATCH 057/218] fix(acp): emit native plan updates for todo

---
 acp_adapter/events.py    | 51 ++++++++++++++++++++++++++++++++++++++++
 tests/acp/test_events.py | 32 ++++++++++++++++++++++++-
 2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/acp_adapter/events.py b/acp_adapter/events.py
index f0442ca2e8f..828807c3aef 100644
--- a/acp_adapter/events.py
+++ b/acp_adapter/events.py
@@ -14,6 +14,7 @@ from collections import deque
 from typing import Any, Callable, Deque, Dict
 
 import acp
+from acp.schema import AgentPlanUpdate, PlanEntry
 
 from .tools import (
     build_tool_complete,
@@ -24,6 +25,52 @@ from .tools import (
 logger = logging.getLogger(__name__)
 
 
+def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
+    """Translate Hermes' todo tool result into ACP's native plan update.
+
+    Zed renders ``sessionUpdate: plan`` as its first-class task/todo panel. The
+    Hermes agent already maintains task state through the ``todo`` tool, so the
+    ACP adapter should expose that state natively instead of only as a generic
+    tool-call transcript block.
+    """
+    if not isinstance(result, str) or not result.strip():
+        return None
+
+    try:
+        data = json.loads(result)
+    except Exception:
+        return None
+
+    if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
+        return None
+
+    status_map = {
+        "pending": "pending",
+        "in_progress": "in_progress",
+        "completed": "completed",
+        # ACP plans only support pending/in_progress/completed. Preserve
+        # cancelled tasks as terminal entries instead of dropping them and
+        # making the client's full-list replacement lose visible context.
+        "cancelled": "completed",
+    }
+    entries: list[PlanEntry] = []
+    for item in data["todos"]:
+        if not isinstance(item, dict):
+            continue
+        content = str(item.get("content") or item.get("id") or "").strip()
+        if not content:
+            continue
+        raw_status = str(item.get("status") or "pending").strip()
+        status = status_map.get(raw_status, "pending")
+        if raw_status == "cancelled":
+            content = f"[cancelled] {content}"
+        entries.append(PlanEntry(content=content, priority="medium", status=status))
+
+    if not entries:
+        return None
+    return AgentPlanUpdate(session_update="plan", entries=entries)
+
+
 def _send_update(
     conn: acp.Client,
     session_id: str,
@@ -175,6 +222,10 @@ def make_step_cb(
                         snapshot=meta.get("snapshot"),
                     )
                     _send_update(conn, session_id, loop, update)
+                    if tool_name == "todo":
+                        plan_update = _build_plan_update_from_todo_result(result)
+                        if plan_update is not None:
+                            _send_update(conn, session_id, loop, plan_update)
                     if not queue:
                         tool_call_ids.pop(tool_name, None)
 
diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py
index 56a2687226c..ebddf076dbd 100644
--- a/tests/acp/test_events.py
+++ b/tests/acp/test_events.py
@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 
 import acp
-from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
+from acp.schema import AgentPlanUpdate, ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
 
 from acp_adapter.events import (
     _send_update,
@@ -296,6 +296,36 @@ class TestStepCallback:
         }
         mock_send.assert_called_once()
 
+    def test_todo_completion_emits_native_plan_update(self, mock_conn, event_loop_fixture):
+        from collections import deque
+
+        tool_call_ids = {"todo": deque(["tc-todo"])}
+        loop = event_loop_fixture
+        cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
+        todo_result = (
+            '{"todos":['
+            '{"id":"inspect","content":"Inspect ACP","status":"completed"},'
+            '{"id":"patch","content":"Patch renderer","status":"in_progress"},'
+            '{"id":"old","content":"Drop stale task","status":"cancelled"}'
+            '],"summary":{"total":3}}'
+        )
+
+        with patch("acp_adapter.events._send_update") as mock_send:
+            cb(1, [{"name": "todo", "result": todo_result}])
+
+        updates = [call.args[3] for call in mock_send.call_args_list]
+        plan_updates = [u for u in updates if getattr(u, "session_update", None) == "plan"]
+        assert len(plan_updates) == 1
+        plan = plan_updates[0]
+        assert isinstance(plan, AgentPlanUpdate)
+        assert [entry.content for entry in plan.entries] == [
+            "Inspect ACP",
+            "Patch renderer",
+            "[cancelled] Drop stale task",
+        ]
+        assert [entry.status for entry in plan.entries] == ["completed", "in_progress", "completed"]
+        assert [entry.priority for entry in plan.entries] == ["medium", "medium", "medium"]
+
 
 # ---------------------------------------------------------------------------
 # Message callback

From bd3a5873e11f084d74be876a505a406224a6ef3e Mon Sep 17 00:00:00 2001
From: HenkDz <noonou7@gmail.com>
Date: Fri, 15 May 2026 16:15:04 +0100
Subject: [PATCH 058/218] fix(acp): replay native todo plans

---
 acp_adapter/events.py    | 21 ++++++++++++----
 acp_adapter/server.py    |  8 ++++++-
 tests/acp/test_events.py | 27 +++++++++++++++++----
 tests/acp/test_server.py | 52 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/acp_adapter/events.py b/acp_adapter/events.py
index 828807c3aef..00e940b9ee0 100644
--- a/acp_adapter/events.py
+++ b/acp_adapter/events.py
@@ -25,6 +25,17 @@ from .tools import (
 logger = logging.getLogger(__name__)
 
 
+def _json_loads_maybe_prefix(value: str) -> Any:
+    """Parse a JSON object even when Hermes appended a human hint after it."""
+    text = value.strip()
+    try:
+        return json.loads(text)
+    except Exception:
+        decoder = json.JSONDecoder()
+        data, _ = decoder.raw_decode(text)
+        return data
+
+
 def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
     """Translate Hermes' todo tool result into ACP's native plan update.
 
@@ -37,13 +48,17 @@ def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
         return None
 
     try:
-        data = json.loads(result)
+        data = _json_loads_maybe_prefix(result)
     except Exception:
         return None
 
     if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
         return None
 
+    todos = data["todos"]
+    if not todos:
+        return AgentPlanUpdate(session_update="plan", entries=[])
+
     status_map = {
         "pending": "pending",
         "in_progress": "in_progress",
@@ -54,7 +69,7 @@ def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
         "cancelled": "completed",
     }
     entries: list[PlanEntry] = []
-    for item in data["todos"]:
+    for item in todos:
         if not isinstance(item, dict):
             continue
         content = str(item.get("content") or item.get("id") or "").strip()
@@ -66,8 +81,6 @@ def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
             content = f"[cancelled] {content}"
         entries.append(PlanEntry(content=content, priority="medium", status=status))
 
-    if not entries:
-        return None
     return AgentPlanUpdate(session_update="plan", entries=entries)
 
 
diff --git a/acp_adapter/server.py b/acp_adapter/server.py
index 20c4d7cdb4f..71fce1890d1 100644
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -59,6 +59,7 @@ from acp.schema import (
 
 from acp_adapter.auth import TERMINAL_SETUP_AUTH_METHOD_ID, build_auth_methods, detect_provider
 from acp_adapter.events import (
+    _build_plan_update_from_todo_result,
     make_message_cb,
     make_step_cb,
     make_thinking_cb,
@@ -910,15 +911,20 @@ class HermesACPAgent(acp.Agent):
                 if not tool_call_id or not tool_name:
                     continue
                 result = message.get("content")
+                result_text = result if isinstance(result, str) else None
                 if not await _send(
                     build_tool_complete(
                         tool_call_id,
                         tool_name,
-                        result=result if isinstance(result, str) else None,
+                        result=result_text,
                         function_args=function_args,
                     )
                 ):
                     return
+                if tool_name == "todo":
+                    plan_update = _build_plan_update_from_todo_result(result_text)
+                    if plan_update is not None and not await _send(plan_update):
+                        return
 
     async def new_session(
         self,
diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py
index ebddf076dbd..ec0b32549da 100644
--- a/tests/acp/test_events.py
+++ b/tests/acp/test_events.py
@@ -12,6 +12,7 @@ import acp
 from acp.schema import AgentPlanUpdate, ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
 
 from acp_adapter.events import (
+    _build_plan_update_from_todo_result,
     _send_update,
     make_message_cb,
     make_step_cb,
@@ -296,7 +297,7 @@ class TestStepCallback:
         }
         mock_send.assert_called_once()
 
-    def test_todo_completion_emits_native_plan_update(self, mock_conn, event_loop_fixture):
+    def test_todo_completion_emits_native_plan_update_after_tool_completion(self, mock_conn, event_loop_fixture):
         from collections import deque
 
         tool_call_ids = {"todo": deque(["tc-todo"])}
@@ -314,9 +315,11 @@ class TestStepCallback:
             cb(1, [{"name": "todo", "result": todo_result}])
 
         updates = [call.args[3] for call in mock_send.call_args_list]
-        plan_updates = [u for u in updates if getattr(u, "session_update", None) == "plan"]
-        assert len(plan_updates) == 1
-        plan = plan_updates[0]
+        assert [getattr(update, "session_update", None) for update in updates] == [
+            "tool_call_update",
+            "plan",
+        ]
+        plan = updates[1]
         assert isinstance(plan, AgentPlanUpdate)
         assert [entry.content for entry in plan.entries] == [
             "Inspect ACP",
@@ -326,6 +329,22 @@ class TestStepCallback:
         assert [entry.status for entry in plan.entries] == ["completed", "in_progress", "completed"]
         assert [entry.priority for entry in plan.entries] == ["medium", "medium", "medium"]
 
+    def test_todo_plan_update_parses_json_with_trailing_hint(self):
+        result = '{"todos":[{"id":"ship","content":"Ship ACP plan","status":"pending"}]}\n\n[Hint: persisted]'
+
+        update = _build_plan_update_from_todo_result(result)
+
+        assert isinstance(update, AgentPlanUpdate)
+        assert [entry.content for entry in update.entries] == ["Ship ACP plan"]
+        assert [entry.status for entry in update.entries] == ["pending"]
+
+    def test_todo_plan_update_with_empty_todos_clears_plan(self):
+        update = _build_plan_update_from_todo_result('{"todos":[],"summary":{"total":0}}')
+
+        assert isinstance(update, AgentPlanUpdate)
+        assert update.session_update == "plan"
+        assert update.entries == []
+
 
 # ---------------------------------------------------------------------------
 # Message callback
diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py
index 6e2039d2b24..511d6e00934 100644
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@@ -12,6 +12,7 @@ from acp.agent.router import build_agent_router
 from acp.schema import (
     AgentCapabilities,
     AgentMessageChunk,
+    AgentPlanUpdate,
     AuthenticateResponse,
     AvailableCommandsUpdate,
     Implementation,
@@ -391,6 +392,57 @@ class TestSessionOps:
         assert "Search results" in tool_updates[1].content[0].content.text
         assert "cli.py:42" in tool_updates[1].content[0].content.text
 
+    @pytest.mark.asyncio
+    async def test_load_session_replays_native_plan_for_persisted_todo_tool(self, agent):
+        """Persisted todo tool results should rebuild Zed's native plan panel."""
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_todo_1",
+                        "type": "function",
+                        "function": {
+                            "name": "todo",
+                            "arguments": '{"todos":[{"id":"ship","content":"Ship it","status":"in_progress"}]}',
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_todo_1",
+                "content": '{"todos":[{"id":"ship","content":"Ship it","status":"in_progress"}]}',
+            },
+        ]
+
+        mock_conn.session_update.reset_mock()
+        resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        assert isinstance(resp, LoadSessionResponse)
+        relevant_updates = [
+            update for update in (call.kwargs["update"] for call in mock_conn.session_update.await_args_list)
+            if getattr(update, "session_update", None) in {"tool_call", "tool_call_update", "plan"}
+        ]
+        assert [getattr(update, "session_update", None) for update in relevant_updates] == [
+            "tool_call",
+            "tool_call_update",
+            "plan",
+        ]
+        plan = relevant_updates[2]
+        assert isinstance(plan, AgentPlanUpdate)
+        assert [entry.content for entry in plan.entries] == ["Ship it"]
+        assert [entry.status for entry in plan.entries] == ["in_progress"]
+
     @pytest.mark.asyncio
     async def test_resume_session_replays_persisted_history_to_client(self, agent):
         mock_conn = MagicMock(spec=acp.Client)

From 622c27e55c58a0d11739a21ae29dd6d072230cf0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:07:56 -0700
Subject: [PATCH 059/218] fix(install.ps1): restore EAP=Continue around uv
 python install, skip Store stub (#26586)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fresh Windows installs were failing on first run with:

    ⚠ uv python install error: Downloading cpython-3.11.15-windows-x86_64-none (24.5MiB)
    ✗ Installation failed: Python was not found; run without arguments
      to install from the Microsoft Store...

Two bugs compounding:

1) EAP=Stop swallows uv's stderr progress as an exception. uv writes
   download progress ("Downloading cpython-3.11.15-windows-x86_64-none
   (24.5MiB)") to stderr. With $ErrorActionPreference = "Stop" set at
   the top of the script plus 2>&1 capture, PowerShell wraps each stderr
   line as an ErrorRecord and throws on the first one — even though uv
   exits 0 and Python was installed successfully. This was previously
   fixed in commit ec1714e71 (May 8) but lost in the May 12 release
   squash (413990c94). Reapply the EAP=Continue + verify-via
   'uv python find' pattern.

2) System-python fallback invokes the Microsoft Store stub. When the uv
   paths fall through, the legacy 'python --version' check invokes
   %LOCALAPPDATA%\\Microsoft\\WindowsApps\\python.exe, a 0-byte
   reparse-point stub that prints 'Python was not found...' to stdout
   and exits non-zero. Get-Command matches it. The resulting error
   message is what the user sees as the final installer crash. Detect
   and skip the stub by checking for the \\WindowsApps\\ path
   component or a 0-byte file size before invoking python.

Also save/restore EAP defensively in the catch blocks so a throw before
the assignment can't leave EAP in 'Continue'.
---
 scripts/install.ps1 | 77 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 2cf81969beb..5ed7aa755fd 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -145,19 +145,39 @@ function Test-Python {
     # Python not found — use uv to install it (no admin needed!)
     Write-Info "Python $PythonVersion not found, installing via uv..."
     try {
+        # Temporarily relax ErrorActionPreference: uv writes download progress
+        # ("Downloading cpython-3.11.15-windows-x86_64-none (24.5MiB)") to
+        # stderr.  With $ErrorActionPreference = "Stop" (set at the top of this
+        # script) PowerShell wraps stderr lines from native commands as
+        # ErrorRecord objects when captured via 2>&1, then throws a terminating
+        # exception on the first one — even though uv exits 0 and Python was
+        # installed successfully.  Verify success via `uv python find`
+        # afterwards, which is the reliable signal regardless of exit-code
+        # semantics or stderr noise.  This fix was previously landed as
+        # commit ec1714e71 and then lost in a release squash; reapplied here.
+        $prevEAP = $ErrorActionPreference
+        $ErrorActionPreference = "Continue"
         $uvOutput = & $UvCmd python install $PythonVersion 2>&1
-        if ($LASTEXITCODE -eq 0) {
-            $pythonPath = & $UvCmd python find $PythonVersion 2>$null
-            if ($pythonPath) {
-                $ver = & $pythonPath --version 2>$null
-                Write-Success "Python installed: $ver"
-                return $true
-            }
-        } else {
+        $uvExitCode = $LASTEXITCODE
+        $ErrorActionPreference = $prevEAP
+
+        # Check if Python is now available (more reliable than exit code
+        # since uv may return non-zero due to "already installed" etc.)
+        $pythonPath = & $UvCmd python find $PythonVersion 2>$null
+        if ($pythonPath) {
+            $ver = & $pythonPath --version 2>$null
+            Write-Success "Python installed: $ver"
+            return $true
+        }
+
+        # uv ran but Python still not findable — show what happened
+        if ($uvExitCode -ne 0) {
             Write-Warn "uv python install output:"
             Write-Host $uvOutput -ForegroundColor DarkGray
         }
     } catch {
+        # Restore EAP in case the try block threw before the assignment
+        if ($prevEAP) { $ErrorActionPreference = $prevEAP }
         Write-Warn "uv python install error: $_"
     }
 
@@ -175,15 +195,42 @@ function Test-Python {
         } catch { }
     }
 
-    # Fallback: try system python
-    if (Get-Command python -ErrorAction SilentlyContinue) {
-        $sysVer = python --version 2>$null
-        if ($sysVer -match "3\.(1[0-9]|[1-9][0-9])") {
-            Write-Success "Using system Python: $sysVer"
-            return $true
+    # Fallback: try system python — but skip the Microsoft Store stub.
+    # On Windows, %LOCALAPPDATA%\Microsoft\WindowsApps\python.exe is a 0-byte
+    # reparse-point stub that prints "Python was not found; run without
+    # arguments to install from the Microsoft Store..." to stdout and exits
+    # non-zero.  Get-Command finds it; invoking it produces a confusing error
+    # that the user sees as our installer crashing.
+    $pythonCmd = Get-Command python -ErrorAction SilentlyContinue
+    if ($pythonCmd) {
+        $isStoreStub = $false
+        try {
+            $pythonSource = $pythonCmd.Source
+            if ($pythonSource -and $pythonSource -like "*\WindowsApps\*") {
+                $isStoreStub = $true
+            } else {
+                # Even outside WindowsApps, a 0-byte file is the stub
+                $item = Get-Item $pythonSource -ErrorAction SilentlyContinue
+                if ($item -and $item.Length -eq 0) { $isStoreStub = $true }
+            }
+        } catch { }
+
+        if (-not $isStoreStub) {
+            try {
+                $prevEAP2 = $ErrorActionPreference
+                $ErrorActionPreference = "Continue"
+                $sysVer = & python --version 2>&1
+                $ErrorActionPreference = $prevEAP2
+                if ($sysVer -match "Python 3\.(1[0-9]|[1-9][0-9])") {
+                    Write-Success "Using system Python: $sysVer"
+                    return $true
+                }
+            } catch {
+                if ($prevEAP2) { $ErrorActionPreference = $prevEAP2 }
+            }
         }
     }
-    
+
     Write-Err "Failed to install Python $PythonVersion"
     Write-Info "Install Python 3.11 manually, then re-run this script:"
     Write-Info "  https://www.python.org/downloads/"

From 3b9368a0c47176b449ea0254cdac31ec4d5ae925 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:27:50 -0700
Subject: [PATCH 060/218] fix(auth): point SSH OAuth users at the tunnel they
 actually need (#26592)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two loopback-redirect OAuth flows (xAI Grok, Spotify) silently fail when
Hermes runs on a remote host: the auth server redirects to
127.0.0.1:<port> on the user's laptop, not on the remote box. The
--no-browser flag only suppresses webbrowser.open() — it doesn't change
the bind address. Symptom xAI surfaces is 'Could not establish
connection. We couldn't reach your app.', followed by a 'xAI
authorization timed out waiting for the local callback' on the CLI side.

Changes
- hermes_cli/auth.py: new _print_loopback_ssh_hint() helper, called from
  _xai_oauth_loopback_login() and _spotify_login() right after they
  print the redirect URI. Silent off SSH; on SSH prints the exact
  'ssh -N -L <port>:127.0.0.1:<port>' command using the actually-bound
  port (not the hardcoded constant — the listener auto-bumps when the
  preferred port is busy), a provider-specific docs URL, and a link to
  the new shared guide.
- website/docs/guides/oauth-over-ssh.md (new): single source of truth
  for the tunnel pattern — TL;DR command, jump-box / ProxyJump variant,
  mosh+tmux+ControlMaster gotchas, troubleshooting.
- website/docs/guides/xai-grok-oauth.md: fix the two sections that
  claimed --no-browser alone was enough; link to the shared guide.
- website/docs/user-guide/features/spotify.md: expand the existing
  one-liner; link to the shared guide.
- website/sidebars.ts: register the new page.
- tests/hermes_cli/test_auth_loopback_ssh_hint.py: 7 unit tests
  covering SSH-vs-not, loopback-vs-not, malformed URIs, port echo,
  with and without provider docs URL.
---
 hermes_cli/auth.py                            |  46 ++++++
 .../hermes_cli/test_auth_loopback_ssh_hint.py |  95 ++++++++++++
 website/docs/guides/oauth-over-ssh.md         | 137 ++++++++++++++++++
 website/docs/guides/xai-grok-oauth.md         |  23 ++-
 website/docs/user-guide/features/spotify.md   |   8 +-
 website/sidebars.ts                           |   1 +
 6 files changed, 304 insertions(+), 6 deletions(-)
 create mode 100644 tests/hermes_cli/test_auth_loopback_ssh_hint.py
 create mode 100644 website/docs/guides/oauth-over-ssh.md

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index c6dce709384..6cabb61570d 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -107,6 +107,9 @@ DEFAULT_SPOTIFY_REDIRECT_URI = "http://127.0.0.1:43827/spotify/callback"
 SPOTIFY_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify"
 SPOTIFY_DASHBOARD_URL = "https://developer.spotify.com/dashboard"
 SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
+
+XAI_OAUTH_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/guides/xai-grok-oauth"
+OAUTH_OVER_SSH_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/guides/oauth-over-ssh"
 DEFAULT_SPOTIFY_SCOPE = " ".join((
     "user-modify-playback-state",
     "user-read-playback-state",
@@ -2528,6 +2531,8 @@ def login_spotify_command(args) -> None:
     print(f"Full setup guide: {SPOTIFY_DOCS_URL}")
     print()
 
+    _print_loopback_ssh_hint(redirect_uri, docs_url=SPOTIFY_DOCS_URL)
+
     if open_browser and not _is_remote_session():
         try:
             opened = webbrowser.open(authorize_url)
@@ -2584,6 +2589,45 @@ def _is_remote_session() -> bool:
     return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY"))
 
 
+def _print_loopback_ssh_hint(redirect_uri: str, *, docs_url: str | None = None) -> None:
+    """Print an SSH tunnel hint when running a loopback-redirect OAuth flow on a
+    remote host. The auth server (xAI, Spotify, ...) will redirect the user's
+    browser to ``127.0.0.1:<port>/callback``. If the browser is on a different
+    machine than the loopback listener (the usual SSH case), the redirect can't
+    reach the listener without a local port forward.
+
+    The hint is best-effort: silent if we don't think we're remote, or if we
+    can't parse a host/port out of the redirect URI.
+
+    Pass ``docs_url`` for a provider-specific guide (e.g. the xAI Grok OAuth
+    page); the generic OAuth-over-SSH guide is always shown after it.
+    """
+    if not _is_remote_session():
+        return
+    try:
+        parsed = urlparse(redirect_uri)
+    except Exception:
+        return
+    host = parsed.hostname or ""
+    port = parsed.port
+    if host not in ("127.0.0.1", "::1", "localhost") or not port:
+        return
+    print()
+    print("Remote session detected. Your browser will redirect to")
+    print(f"  {redirect_uri}")
+    print("which the loopback listener on THIS machine is waiting on. If your")
+    print("browser is on a different machine, forward the port first from your")
+    print("local machine in a separate terminal:")
+    print()
+    print(f"  ssh -N -L {port}:127.0.0.1:{port} <user>@<this-host>")
+    print()
+    print("Then open the authorize URL above in your local browser.")
+    if docs_url:
+        print(f"Provider docs:      {docs_url}")
+    print(f"SSH/jump-box guide: {OAUTH_OVER_SSH_DOCS_URL}")
+    print()
+
+
 # =============================================================================
 # OpenAI Codex auth — tokens stored in ~/.hermes/auth.json (not ~/.codex/)
 #
@@ -5297,6 +5341,8 @@ def _xai_oauth_loopback_login(
         print()
         print(f"Waiting for callback on {redirect_uri}")
 
+        _print_loopback_ssh_hint(redirect_uri, docs_url=XAI_OAUTH_DOCS_URL)
+
         if open_browser and not _is_remote_session():
             try:
                 opened = webbrowser.open(authorize_url)
diff --git a/tests/hermes_cli/test_auth_loopback_ssh_hint.py b/tests/hermes_cli/test_auth_loopback_ssh_hint.py
new file mode 100644
index 00000000000..fb88a6bf4ce
--- /dev/null
+++ b/tests/hermes_cli/test_auth_loopback_ssh_hint.py
@@ -0,0 +1,95 @@
+"""Unit tests for _print_loopback_ssh_hint() in hermes_cli/auth.py.
+
+The helper exists to warn users that loopback OAuth flows (xAI Grok OAuth,
+Spotify) don't work over SSH unless they set up an `ssh -L` port forward
+between their laptop's browser and the remote host's loopback listener.
+"""
+
+from __future__ import annotations
+
+import io
+import contextlib
+
+import pytest
+
+from hermes_cli import auth as auth_mod
+
+
+def _cap(fn):
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        fn()
+    return buf.getvalue()
+
+
+def test_loopback_ssh_hint_silent_when_not_remote(monkeypatch):
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: False)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "http://127.0.0.1:56121/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL
+    ))
+    assert out == ""
+
+
+def test_loopback_ssh_hint_prints_tunnel_command_on_ssh(monkeypatch):
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "http://127.0.0.1:56121/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL
+    ))
+    # Must include the exact ssh -L command with the port from the redirect URI
+    assert "ssh -N -L 56121:127.0.0.1:56121" in out
+    # Must include the provider-specific docs URL
+    assert auth_mod.XAI_OAUTH_DOCS_URL in out
+    # Must always include the cross-provider SSH guide
+    assert auth_mod.OAUTH_OVER_SSH_DOCS_URL in out
+
+
+def test_loopback_ssh_hint_uses_actual_bound_port(monkeypatch):
+    """When the preferred port is busy, _xai_start_callback_server falls back to
+    an OS-assigned port. The hint must echo whichever port actually got bound,
+    not the hardcoded constant."""
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "http://127.0.0.1:51234/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL
+    ))
+    assert "ssh -N -L 51234:127.0.0.1:51234" in out
+    assert "56121" not in out
+
+
+def test_loopback_ssh_hint_silent_for_non_loopback_uri(monkeypatch):
+    """Defense in depth: if a future caller passes a non-loopback redirect URI
+    by mistake, we don't tell the user to forward an external port."""
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "https://example.com/callback", docs_url=auth_mod.XAI_OAUTH_DOCS_URL
+    ))
+    assert out == ""
+
+
+def test_loopback_ssh_hint_silent_for_malformed_uri(monkeypatch):
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "not-a-uri", docs_url=auth_mod.XAI_OAUTH_DOCS_URL
+    ))
+    assert out == ""
+
+
+def test_loopback_ssh_hint_works_without_provider_docs_url(monkeypatch):
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "http://127.0.0.1:43827/spotify/callback"
+    ))
+    assert "ssh -N -L 43827:127.0.0.1:43827" in out
+    # Generic SSH guide is always present even without a provider-specific URL
+    assert auth_mod.OAUTH_OVER_SSH_DOCS_URL in out
+    # Should not falsely show "Provider docs:" when no docs_url was passed
+    assert "Provider docs:" not in out
+
+
+def test_loopback_ssh_hint_accepts_localhost_hostname(monkeypatch):
+    """The constant is 127.0.0.1, but parsing tolerates `localhost` too in case
+    a future caller normalizes the URI differently."""
+    monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True)
+    out = _cap(lambda: auth_mod._print_loopback_ssh_hint(
+        "http://localhost:56121/callback"
+    ))
+    assert "ssh -N -L 56121:127.0.0.1:56121" in out
diff --git a/website/docs/guides/oauth-over-ssh.md b/website/docs/guides/oauth-over-ssh.md
new file mode 100644
index 00000000000..46a818a7934
--- /dev/null
+++ b/website/docs/guides/oauth-over-ssh.md
@@ -0,0 +1,137 @@
+---
+sidebar_position: 17
+title: "OAuth over SSH / Remote Hosts"
+description: "How to complete browser-based OAuth (xAI, Spotify) when Hermes runs on a remote machine, container, or behind a jump box"
+---
+
+# OAuth over SSH / Remote Hosts
+
+Some Hermes providers — currently **xAI Grok OAuth** and **Spotify** — use a *loopback redirect* OAuth flow. The auth server (xAI, Spotify) redirects your browser to `http://127.0.0.1:<port>/callback` so a tiny HTTP listener started by the `hermes auth ...` command can grab the authorization code.
+
+This works perfectly when Hermes and your browser are on the same machine. It breaks the moment they aren't: your laptop's browser tries to reach `127.0.0.1` on **your laptop**, but the listener is bound to `127.0.0.1` on **the remote server**.
+
+The fix is a one-line SSH local-forward.
+
+## TL;DR
+
+```bash
+# On your local machine (laptop), in a separate terminal:
+ssh -N -L 56121:127.0.0.1:56121 user@remote-host
+
+# In your existing SSH session on the remote machine:
+hermes auth add xai-oauth --no-browser
+# → Hermes prints an authorize URL. Open it in a browser on your laptop.
+# → Your browser redirects to 127.0.0.1:56121/callback, the tunnel forwards
+#   the request to the remote listener, login completes.
+```
+
+Port `56121` is what xAI OAuth uses. For Spotify, replace it with `43827`. Hermes prints the exact port it bound to on the `Waiting for callback on ...` line — copy it from there.
+
+## Which Providers Need This
+
+| Provider | Loopback port | Tunnel needed? |
+|----------|---------------|----------------|
+| `xai-oauth` (Grok SuperGrok) | `56121` | Yes, when Hermes is remote |
+| Spotify | `43827` | Yes, when Hermes is remote |
+| `anthropic` (Claude Pro/Max) | n/a | No — paste-the-code flow |
+| `openai-codex` (ChatGPT Plus/Pro) | n/a | No — device code flow |
+| `minimax`, `nous-portal` | n/a | No — device code flow |
+
+If your provider isn't in the table, you don't need a tunnel.
+
+## Why the listener can't just bind 0.0.0.0
+
+xAI and Spotify both validate the `redirect_uri` parameter against an allowlist. Both require the loopback form (`http://127.0.0.1:<exact-port>/callback`). Binding the listener to `0.0.0.0` or a different port would cause the auth server to reject the request as a redirect_uri mismatch. The SSH tunnel keeps the loopback URI intact end-to-end.
+
+## Step-by-step: single SSH hop
+
+### 1. Start the tunnel from your local machine
+
+```bash
+# xAI Grok OAuth (port 56121)
+ssh -N -L 56121:127.0.0.1:56121 user@remote-host
+
+# Or for Spotify (port 43827)
+ssh -N -L 43827:127.0.0.1:43827 user@remote-host
+```
+
+`-N` means "don't open a remote shell, just hold the tunnel open." Keep this terminal running for the duration of the login.
+
+### 2. In a separate SSH session, run the auth command
+
+```bash
+ssh user@remote-host
+hermes auth add xai-oauth --no-browser
+# or for Spotify:
+# hermes auth add spotify --no-browser
+```
+
+Hermes detects the SSH session, skips the browser auto-open, and prints an authorize URL plus a `Waiting for callback on http://127.0.0.1:<port>/callback` line.
+
+### 3. Open the URL in your local browser
+
+Copy the authorize URL from the remote terminal and paste it into the browser on your laptop. Approve the consent screen. The auth server redirects to `http://127.0.0.1:<port>/callback`. Your browser hits the tunnel, the request is forwarded to the remote listener, and Hermes prints `Login successful!`.
+
+You can tear down the tunnel (Ctrl+C in the first terminal) once you see the success line.
+
+## Step-by-step: through a jump box
+
+If you reach Hermes through a bastion / jump host, use SSH's built-in `-J` (ProxyJump):
+
+```bash
+ssh -N -L 56121:127.0.0.1:56121 -J jump-user@jump-host user@final-host
+```
+
+This chains a SSH connection through the jump host without putting the loopback port on the jump box itself. The local `127.0.0.1:56121` on your laptop tunnels straight through to `127.0.0.1:56121` on the final remote host.
+
+For older OpenSSH that doesn't support `-J`, the long form is:
+
+```bash
+ssh -N \
+    -o "ProxyCommand=ssh -W %h:%p jump-user@jump-host" \
+    -L 56121:127.0.0.1:56121 \
+    user@final-host
+```
+
+## Mosh, tmux, ssh ControlMaster
+
+The tunnel is a property of the underlying SSH connection. If you're running Hermes inside `tmux` over a mosh session, the mosh roaming doesn't carry the `-L` forwarding. Open a *separate* plain SSH session **only** for the `-L` tunnel — that's the connection that has to stay alive during the auth flow. Your interactive mosh/tmux session can keep running Hermes normally.
+
+If you use `ssh -o ControlMaster=auto`, port forwards on a multiplexed connection share the master's lifetime. Restart the master if the tunnel doesn't come up:
+
+```bash
+ssh -O exit user@remote-host
+ssh -N -L 56121:127.0.0.1:56121 user@remote-host
+```
+
+## Troubleshooting
+
+### `bind [127.0.0.1]:56121: Address already in use`
+
+Something on your laptop is already using that port. Either the previous tunnel didn't shut down cleanly, or a local Hermes is also listening on it. Find and kill the offender:
+
+```bash
+# macOS / Linux
+lsof -iTCP:56121 -sTCP:LISTEN
+kill <PID>
+```
+
+Then retry the `ssh -L` command.
+
+### "Could not establish connection. We couldn't reach your app." (xAI)
+
+xAI's authorize page shows this when its redirect to `127.0.0.1:<port>/callback` doesn't reach a listener. Either the tunnel isn't running, the port is wrong, or you're using the port Hermes printed in a previous run (the port can be auto-bumped if the preferred one is busy — always read the latest `Waiting for callback on ...` line).
+
+### `xAI authorization timed out waiting for the local callback`
+
+Same root cause as above — the redirect never made it back. Check the tunnel is still alive (`ssh -N` doesn't show output, so look at the terminal you started it from), restart it if needed, and re-run `hermes auth add xai-oauth --no-browser`.
+
+### Tokens land in the wrong `~/.hermes`
+
+The tokens are written under the Linux user that ran `hermes auth add ...`. If your gateway / systemd service runs as a different user (e.g. `root` or a dedicated `hermes` user), authenticate as **that** user so the tokens land in their `~/.hermes/auth.json`. `sudo -u hermes -i` or equivalent.
+
+## See Also
+
+- [xAI Grok OAuth](./xai-grok-oauth.md)
+- [Spotify (`Running over SSH`)](../user-guide/features/spotify.md#running-over-ssh--in-a-headless-environment)
+- [SSH `-J` / ProxyJump (man page)](https://man.openbsd.org/ssh#J)
diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md
index 5afccb6d881..95167a2430c 100644
--- a/website/docs/guides/xai-grok-oauth.md
+++ b/website/docs/guides/xai-grok-oauth.md
@@ -59,14 +59,23 @@ hermes auth add xai-oauth
 
 ### Remote / headless sessions
 
-On servers, containers, or SSH sessions where no browser is available, Hermes detects the remote environment and prints the authorization URL instead of opening a browser. Open the URL on any device with a browser, complete the consent flow, and Hermes finishes the loopback exchange when the redirect comes back.
+On servers, containers, or SSH sessions where no browser is available, Hermes detects the remote environment and prints the authorization URL instead of opening a browser.
 
-If you need to force this behaviour explicitly:
+**Important:** the loopback listener still runs on the remote machine at `127.0.0.1:56121`. The xAI redirect needs to reach *that* listener, so opening the URL on your laptop will fail (`Could not establish connection. We couldn't reach your app.`) unless you forward the port:
 
 ```bash
+# In a separate terminal on your local machine:
+ssh -N -L 56121:127.0.0.1:56121 user@remote-host
+
+# Then in your SSH session on the remote machine:
 hermes auth add xai-oauth --no-browser
+# Open the printed authorize URL in your local browser.
 ```
 
+Through a jump box / bastion: add `-J jump-user@jump-host`.
+
+See [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md) for the full step-by-step, including ProxyJump chains, mosh/tmux, and ControlMaster gotchas.
+
 ## How the Login Works
 
 1. Hermes opens your browser to `accounts.x.ai`.
@@ -182,14 +191,18 @@ Hermes detected that the `state` value returned by the authorization server does
 
 ### Logging in from a remote server
 
-On SSH or container sessions Hermes prints the authorization URL instead of opening a browser. Open the URL on any device with a browser and complete the consent there — the loopback callback comes back to your remote host.
-
-You can also force this behaviour:
+On SSH or container sessions Hermes prints the authorization URL instead of opening a browser. The loopback callback listener still binds `127.0.0.1:56121` on the remote host — your laptop's browser can't reach it without an SSH local-forward:
 
 ```bash
+# Local machine, separate terminal:
+ssh -N -L 56121:127.0.0.1:56121 user@remote-host
+
+# Remote machine:
 hermes auth add xai-oauth --no-browser
 ```
 
+Full walkthrough (jump boxes, mosh/tmux, port conflicts): [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md).
+
 ### "No xAI credentials found" error at runtime
 
 The auth store has no `xai-oauth` entry and no `XAI_API_KEY` is set. You haven't logged in yet, or the credential file was deleted.
diff --git a/website/docs/user-guide/features/spotify.md b/website/docs/user-guide/features/spotify.md
index bf9d652b318..5e57688e48f 100644
--- a/website/docs/user-guide/features/spotify.md
+++ b/website/docs/user-guide/features/spotify.md
@@ -68,7 +68,13 @@ Agree to the terms and click **Save**. On the next page click **Settings** → c
 
 ### Running over SSH / in a headless environment
 
-If `SSH_CLIENT` or `SSH_TTY` is set, Hermes skips the automatic browser open during both the wizard and the OAuth step. Copy the dashboard URL and the authorization URL Hermes prints, open them in a browser on your local machine, and proceed normally — the local HTTP listener still runs on the remote host on port 43827. If you need to reach it through an SSH tunnel, forward that port: `ssh -L 43827:127.0.0.1:43827 remote`.
+If `SSH_CLIENT` or `SSH_TTY` is set, Hermes skips the automatic browser open during both the wizard and the OAuth step. Copy the dashboard URL and the authorization URL Hermes prints, open them in a browser on your local machine, and proceed normally — the local HTTP listener still runs on the remote host on port `43827`. Your laptop's browser can't reach the remote loopback without an SSH local-forward:
+
+```bash
+ssh -N -L 43827:127.0.0.1:43827 user@remote-host
+```
+
+For jump-box / bastion setups and other gotchas (mosh, tmux, port conflicts), see [OAuth over SSH / Remote Hosts](../../guides/oauth-over-ssh.md).
 
 ## Verify
 
diff --git a/website/sidebars.ts b/website/sidebars.ts
index a0fb24b8c50..f0a0658c3bf 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -192,6 +192,7 @@ const sidebars: SidebarsConfig = {
         'guides/aws-bedrock',
         'guides/azure-foundry',
         'guides/xai-grok-oauth',
+        'guides/oauth-over-ssh',
         'guides/microsoft-graph-app-registration',
         'guides/operate-teams-meeting-pipeline',
       ],

From 518f39557b6753a5dc766a05dd14dd5cf2b9edeb Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:32:14 -0700
Subject: [PATCH 061/218] fix(gateway): keep running when platforms fail; add
 per-platform circuit breaker + /platform (#26600)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stop the gateway from exiting (or systemd-restart-looping) when a single
messaging adapter fails at startup or runtime.  A misconfigured WhatsApp
(npm install timeout, unpaired bridge, missing creds.json) used to take
the entire gateway down, killing cron jobs and any other connected
platforms with it.

Changes:

  • Startup (gateway/run.py): when connected_count==0 but the only
    errors are retryable, log a degraded-state warning and keep the
    gateway alive instead of returning False.  Reconnect watcher then
    recovers platforms as their underlying problem clears.

  • Runtime (gateway/run.py _handle_adapter_fatal_error): when the last
    adapter goes down with a retryable error and is queued for
    reconnection, stay alive instead of exit-with-failure.  Previously
    this triggered systemd Restart=on-failure, which created infinite
    restart loops on persistent retryable failures (proxy outage,
    repeated bridge crashes).

  • Reconnect watcher (gateway/run.py _platform_reconnect_watcher):
    replace the 20-attempt hard drop with a circuit-breaker pause.
    After _PAUSE_AFTER_FAILURES (10) consecutive retryable failures, the
    platform stays in _failed_platforms with paused=True so the watcher
    skips it but the operator can still see and resume it.  Non-retryable
    errors still drop out of the queue immediately.  Resolves #17063
    (gateway giving up on Telegram after 20 attempts).

  • WhatsApp preflight (gateway/platforms/whatsapp.py): refuse to start
    the Node bridge when creds.json is missing.  Sets a non-retryable
    whatsapp_not_paired fatal error so the watcher drops it cleanly
    with a single 'run hermes whatsapp' log line instead of paying the
    30s bridge bootstrap timeout on every gateway start.

  • WhatsApp setup ordering (hermes_cli/main.py cmd_whatsapp): only set
    WHATSAPP_ENABLED=true once pairing actually succeeds.  Previously
    the wizard wrote the env var at step 2 (before npm install and QR
    pairing), so any Ctrl+C left .env claiming WhatsApp was ready when
    the bridge had no creds.json.  Also propagate the env var when the
    user keeps an existing pairing on a re-run.

  • /platform slash command (hermes_cli/commands.py + gateway/run.py):
    new gateway-only command for manual circuit-breaker control.
      /platform list           — show connected + failed/paused platforms
      /platform pause <name>   — silence a known-broken platform
      /platform resume <name>  — re-queue a paused platform

Tests:

  • New: pause/resume helpers, /platform list|pause|resume command,
    WhatsApp creds.json preflight, WhatsApp setup ordering.
  • Updated: stale assertions that codified the old 'exit and let
    systemd restart' behavior in test_runner_fatal_adapter.py,
    test_runner_startup_failures.py, and test_platform_reconnect.py
    (the 20-attempt give-up test became a circuit-breaker pause test).

5488 tests pass in tests/gateway/.
---
 gateway/platforms/whatsapp.py                 |  34 ++-
 gateway/run.py                                | 255 +++++++++++++++---
 hermes_cli/commands.py                        |   2 +
 hermes_cli/main.py                            |  27 +-
 tests/gateway/test_platform_reconnect.py      | 230 +++++++++++++++-
 tests/gateway/test_runner_fatal_adapter.py    |  12 +-
 tests/gateway/test_runner_startup_failures.py |  17 +-
 tests/gateway/test_whatsapp_connect.py        |  90 +++++++
 .../test_whatsapp_setup_ordering.py           | 140 ++++++++++
 9 files changed, 745 insertions(+), 62 deletions(-)
 create mode 100644 tests/hermes_cli/test_whatsapp_setup_ordering.py

diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 5239df3b5ae..0ca3d41fabb 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -493,13 +493,45 @@ class WhatsAppAdapter(BasePlatformAdapter):
         """
         if not check_whatsapp_requirements():
             logger.warning("[%s] Node.js not found. WhatsApp requires Node.js.", self.name)
+            self._set_fatal_error(
+                "whatsapp_node_missing",
+                "Node.js is not installed — install Node.js and re-run `hermes gateway`.",
+                retryable=False,
+            )
             return False
         
         bridge_path = Path(self._bridge_script)
         if not bridge_path.exists():
             logger.warning("[%s] Bridge script not found: %s", self.name, bridge_path)
+            self._set_fatal_error(
+                "whatsapp_bridge_missing",
+                f"WhatsApp bridge script missing at {bridge_path}.",
+                retryable=False,
+            )
             return False
-        
+
+        # Pre-flight: skip the 30s bridge bootstrap entirely if the user
+        # never finished pairing.  Without creds.json the bridge prints
+        # QR codes to its log file and never reaches status:connected,
+        # so every gateway restart paid the 30s timeout + queued WhatsApp
+        # for indefinite retries.  Mark non-retryable so the user gets a
+        # clear "run hermes whatsapp" message instead of the watcher
+        # silently hammering an unconfigured platform.
+        creds_path = self._session_path / "creds.json"
+        if not creds_path.exists():
+            logger.warning(
+                "[%s] WhatsApp is enabled but not paired (no creds.json at %s). "
+                "Run `hermes whatsapp` to pair, or remove WHATSAPP_ENABLED from "
+                "your .env to disable.",
+                self.name, creds_path,
+            )
+            self._set_fatal_error(
+                "whatsapp_not_paired",
+                "WhatsApp enabled but not paired — run `hermes whatsapp` to pair.",
+                retryable=False,
+            )
+            return False
+
         logger.info("[%s] Bridge found at %s", self.name, bridge_path)
         
         # Acquire scoped lock to prevent duplicate sessions
diff --git a/gateway/run.py b/gateway/run.py
index f41357673f7..f9a282a413f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1990,21 +1990,21 @@ class GatewayRunner:
             await self.stop()
         elif not self.adapters and self._failed_platforms:
             # All platforms are down and queued for background reconnection.
-            # If the error is retryable, exit with failure so systemd Restart=on-failure
-            # can restart the process. Otherwise stay alive and keep retrying in background.
-            if adapter.fatal_error_retryable:
-                self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
-                self._exit_with_failure = True
-                logger.error(
-                    "All messaging platforms failed with retryable errors. "
-                    "Shutting down gateway for service restart (systemd will retry)."
-                )
-                await self.stop()
-            else:
-                logger.warning(
-                    "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
-                    len(self._failed_platforms),
-                )
+            # Keep the gateway alive so:
+            #   • cron jobs still run
+            #   • the reconnect watcher can recover platforms when the
+            #     underlying problem clears (proxy comes back, user runs
+            #     `hermes whatsapp`, etc.)
+            # We used to exit-with-failure here to trigger systemd restart,
+            # but that converted a transient outage into a restart loop and
+            # killed in-process state every time. The reconnect watcher
+            # already handles long-running recovery — let it do its job.
+            logger.warning(
+                "No connected messaging platforms remain, but %d platform(s) "
+                "queued for reconnection — gateway staying alive, watcher will "
+                "retry in background.",
+                len(self._failed_platforms),
+            )
 
     def _request_clean_exit(self, reason: str) -> None:
         self._exit_cleanly = True
@@ -2180,6 +2180,73 @@ class GatewayRunner:
         except Exception:
             pass
 
+    # ------------------------------------------------------------------
+    # Per-platform circuit breaker (pause/resume) — used by the reconnect
+    # watcher when a retryable failure recurs past a threshold, and by the
+    # /platform pause|resume slash command for manual control.
+    # ------------------------------------------------------------------
+    def _pause_failed_platform(self, platform, *, reason: str = "") -> None:
+        """Mark a queued platform as paused — keep it in ``_failed_platforms``
+        but stop the reconnect watcher from hammering it.
+
+        Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive
+        retryable failures, and by ``/platform pause <name>`` for manual
+        intervention.  Paused platforms are surfaced in ``/platform list``
+        and resumed with ``/platform resume <name>``.
+        """
+        info = getattr(self, "_failed_platforms", {}).get(platform)
+        if info is None:
+            return
+        if info.get("paused"):
+            return
+        info["paused"] = True
+        info["pause_reason"] = reason or "auto-paused after repeated failures"
+        # Push next_retry far enough out that even if "paused" is missed
+        # by a stale code path, the watcher won't fire on it.
+        info["next_retry"] = float("inf")
+        try:
+            self._update_platform_runtime_status(
+                platform.value,
+                platform_state="paused",
+                error_code=None,
+                error_message=info["pause_reason"],
+            )
+        except Exception:
+            pass
+        logger.warning(
+            "%s paused after %d consecutive failures (%s) — "
+            "fix the underlying issue then run `/platform resume %s` "
+            "to retry, or `hermes gateway restart` to restart the gateway.",
+            platform.value, info.get("attempts", 0),
+            info["pause_reason"], platform.value,
+        )
+
+    def _resume_paused_platform(self, platform) -> bool:
+        """Unpause a platform — reset its attempt counter and schedule an
+        immediate retry.  Returns True if the platform was paused and is
+        now queued; False if it wasn't paused (or wasn't in the queue).
+        """
+        info = getattr(self, "_failed_platforms", {}).get(platform)
+        if info is None:
+            return False
+        if not info.get("paused"):
+            return False
+        info["paused"] = False
+        info.pop("pause_reason", None)
+        info["attempts"] = 0
+        info["next_retry"] = time.monotonic()  # retry on next watcher tick
+        try:
+            self._update_platform_runtime_status(
+                platform.value,
+                platform_state="retrying",
+                error_code=None,
+                error_message=None,
+            )
+        except Exception:
+            pass
+        logger.info("%s resumed — retrying on next watcher tick", platform.value)
+        return True
+
     @staticmethod
     def _load_prefill_messages() -> List[Dict[str, Any]]:
         """Load ephemeral prefill messages from config or env var.
@@ -3613,16 +3680,32 @@ class GatewayRunner:
                 return True
             if enabled_platform_count > 0:
                 if startup_retryable_errors:
-                    # At least one platform attempted a connection and failed —
-                    # this is a real startup error that should block the gateway.
+                    # All enabled platforms hit retryable failures (network
+                    # blip, bridge not paired, npm install timeout, etc.).
+                    # Keep the gateway alive so:
+                    #   • cron jobs still run
+                    #   • the reconnect watcher gets a chance to recover the
+                    #     failing platforms once the underlying problem is
+                    #     fixed (e.g. user runs `hermes whatsapp`, fixes
+                    #     proxy, etc.)
+                    # Exiting here used to convert a single misconfigured
+                    # platform into an infinite systemd restart loop.
                     reason = "; ".join(startup_retryable_errors)
-                    logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
+                    logger.warning(
+                        "Gateway started with no connected platforms — "
+                        "%d platform(s) queued for retry: %s",
+                        len(self._failed_platforms), reason,
+                    )
                     try:
                         from gateway.status import write_runtime_status
-                        write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
+                        write_runtime_status(
+                            gateway_state="degraded",
+                            exit_reason=None,
+                        )
                     except Exception:
                         pass
-                    return False
+                    # Fall through to the normal "running" state — reconnect
+                    # watcher takes it from here.
                 # All enabled platforms had no adapter (missing library or credentials).
                 # In fleet deployments the same config.yaml is shared across nodes that
                 # may only have credentials for a subset of platforms.  Rather than
@@ -4737,11 +4820,15 @@ class GatewayRunner:
         """Background task that periodically retries connecting failed platforms.
 
         Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
-        Stops retrying a platform after 20 failed attempts or if the error
-        is non-retryable (e.g. bad auth token).
+        Retryable failures keep retrying at the backoff cap indefinitely
+        — but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row
+        without ever succeeding, it is *paused*: kept in the retry queue
+        but no longer hammered.  The user surfaces it with ``/platform list``
+        and resumes it with ``/platform resume <name>``.  Non-retryable
+        failures (bad auth, etc.) still drop out of the queue immediately.
         """
-        _MAX_ATTEMPTS = 20
         _BACKOFF_CAP = 300  # 5 minutes max between retries
+        _PAUSE_AFTER_FAILURES = 10  # circuit-breaker threshold
 
         await asyncio.sleep(10)  # initial delay — let startup finish
         while self._running:
@@ -4758,22 +4845,18 @@ class GatewayRunner:
                 if not self._running:
                     return
                 info = self._failed_platforms[platform]
+                # Skip paused platforms entirely — they need explicit
+                # /platform resume to come back.
+                if info.get("paused"):
+                    continue
                 if now < info["next_retry"]:
                     continue  # not time yet
 
-                if info["attempts"] >= _MAX_ATTEMPTS:
-                    logger.warning(
-                        "Giving up reconnecting %s after %d attempts",
-                        platform.value, info["attempts"],
-                    )
-                    del self._failed_platforms[platform]
-                    continue
-
                 platform_config = info["config"]
                 attempt = info["attempts"] + 1
                 logger.info(
-                    "Reconnecting %s (attempt %d/%d)...",
-                    platform.value, attempt, _MAX_ATTEMPTS,
+                    "Reconnecting %s (attempt %d)...",
+                    platform.value, attempt,
                 )
 
                 try:
@@ -4838,6 +4921,14 @@ class GatewayRunner:
                             "Reconnect %s failed, next retry in %ds",
                             platform.value, backoff,
                         )
+                        if attempt >= _PAUSE_AFTER_FAILURES:
+                            self._pause_failed_platform(
+                                platform,
+                                reason=(
+                                    adapter.fatal_error_message
+                                    or "failed to reconnect"
+                                ),
+                            )
                 except Exception as e:
                     self._update_platform_runtime_status(
                         platform.value,
@@ -4852,6 +4943,8 @@ class GatewayRunner:
                         "Reconnect %s error: %s, next retry in %ds",
                         platform.value, e, backoff,
                     )
+                    if attempt >= _PAUSE_AFTER_FAILURES:
+                        self._pause_failed_platform(platform, reason=str(e))
 
             # Check every 10 seconds for platforms that need reconnection
             for _ in range(10):
@@ -6451,6 +6544,9 @@ class GatewayRunner:
         if canonical == "agents":
             return await self._handle_agents_command(event)
 
+        if canonical == "platform":
+            return await self._handle_platform_command(event)
+
         if canonical == "restart":
             return await self._handle_restart_command(event)
         
@@ -8698,6 +8794,99 @@ class GatewayRunner:
         else:
             return t("gateway.stop.no_active")
 
+    async def _handle_platform_command(self, event: MessageEvent) -> str:
+        """Handle ``/platform list|pause|resume [name]`` — surface and
+        manually control failed/paused gateway adapters.
+
+        Examples:
+            ``/platform list``           — show connected + failed/paused platforms
+            ``/platform pause whatsapp`` — stop the reconnect watcher hammering whatsapp
+            ``/platform resume whatsapp`` — re-queue a paused platform for retry
+        """
+        text = (getattr(event, "content", "") or "").strip()
+        # Strip the leading "/platform" (or "/PLATFORM") token if present
+        parts = text.split(maxsplit=2)
+        if parts and parts[0].lower().lstrip("/").startswith("platform"):
+            parts = parts[1:]
+        action = (parts[0] if parts else "list").lower()
+        target = parts[1].lower() if len(parts) > 1 else ""
+
+        # Resolve platform name (case-insensitive, value match)
+        def _resolve_platform(name: str):
+            if not name:
+                return None
+            for p in Platform.__members__.values():
+                if p.value.lower() == name:
+                    return p
+            return None
+
+        if action == "list":
+            lines = ["**Gateway platforms**"]
+            connected = sorted(p.value for p in self.adapters.keys())
+            if connected:
+                lines.append("Connected: " + ", ".join(connected))
+            else:
+                lines.append("Connected: (none)")
+            failed = getattr(self, "_failed_platforms", {}) or {}
+            if failed:
+                for p, info in failed.items():
+                    if info.get("paused"):
+                        reason = info.get("pause_reason") or "paused"
+                        lines.append(
+                            f"  · {p.value} — PAUSED ({reason}). "
+                            f"Resume with `/platform resume {p.value}`."
+                        )
+                    else:
+                        attempts = info.get("attempts", 0)
+                        lines.append(
+                            f"  · {p.value} — retrying (attempt {attempts})"
+                        )
+            else:
+                lines.append("Failed/paused: (none)")
+            return "\n".join(lines)
+
+        if action in ("pause", "resume"):
+            if not target:
+                return f"Usage: /platform {action} <name>"
+            platform = _resolve_platform(target)
+            if platform is None:
+                return f"Unknown platform: {target}"
+            failed = getattr(self, "_failed_platforms", {}) or {}
+            if action == "pause":
+                if platform not in failed:
+                    return (
+                        f"{platform.value} is not in the retry queue "
+                        f"(it's either connected or not enabled)."
+                    )
+                if failed[platform].get("paused"):
+                    return f"{platform.value} is already paused."
+                self._pause_failed_platform(platform, reason="paused via /platform pause")
+                return (
+                    f"✓ {platform.value} paused. "
+                    f"Resume with `/platform resume {platform.value}` or "
+                    f"`hermes gateway restart` to reset."
+                )
+            # action == "resume"
+            if platform not in failed:
+                return (
+                    f"{platform.value} is not in the retry queue — "
+                    f"nothing to resume."
+                )
+            if not failed[platform].get("paused"):
+                return (
+                    f"{platform.value} is already retrying — "
+                    f"no resume needed."
+                )
+            self._resume_paused_platform(platform)
+            return f"✓ {platform.value} resumed — retrying on next watcher tick."
+
+        return (
+            "Usage: /platform <list|pause|resume> [name]\n"
+            "  /platform list — show platform status\n"
+            "  /platform pause <name> — stop retrying a failing platform\n"
+            "  /platform resume <name> — re-queue a paused platform"
+        )
+
     async def _handle_restart_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
         """Handle /restart command - drain active work, then restart the gateway."""
         # Defensive idempotency check: if the previous gateway process
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index b3556d3932d..83d86c4a3a9 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -198,6 +198,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
                args_hint="[days]"),
     CommandDef("platforms", "Show gateway/messaging platform status", "Info",
                cli_only=True, aliases=("gateway",)),
+    CommandDef("platform", "Pause, resume, or list a failing gateway platform", "Info",
+               gateway_only=True, args_hint="<pause|resume|list> [name]"),
     CommandDef("copy", "Copy the last assistant response to clipboard", "Info",
                cli_only=True, args_hint="[number]"),
     CommandDef("paste", "Attach clipboard image from your clipboard", "Info",
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index c2c8a6880d2..7eedc3fd322 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1522,14 +1522,18 @@ def cmd_whatsapp(args):
         )
         print(f"\n✓ Mode: {mode_label}")
 
-    # ── Step 2: Enable WhatsApp ──────────────────────────────────────────
+    # ── Step 2: Mode is selected, will enable WhatsApp only after pairing ──
+    # We intentionally don't write WHATSAPP_ENABLED=true here.  If the user
+    # aborts the wizard later (Ctrl+C, failed npm install, missed QR scan),
+    # we'd otherwise leave .env claiming WhatsApp is ready when the bridge
+    # has no creds.json.  Every subsequent `hermes gateway` then paid a 30s
+    # bridge-bootstrap timeout and queued WhatsApp for indefinite retries.
+    # Now: aborted setup leaves WHATSAPP_ENABLED unset → gateway skips it.
+    # Re-runs that already have WHATSAPP_ENABLED=true (from a prior
+    # successful pairing) stay enabled — we just don't write it pre-emptively.
     print()
-    current = get_env_value("WHATSAPP_ENABLED")
-    if current and current.lower() == "true":
+    if (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true":
         print("✓ WhatsApp is already enabled")
-    else:
-        save_env_value("WHATSAPP_ENABLED", "true")
-        print("✓ WhatsApp enabled")
 
     # ── Step 3: Allowed users ────────────────────────────────────────────
     current_users = get_env_value("WHATSAPP_ALLOWED_USERS") or ""
@@ -1619,6 +1623,12 @@ def cmd_whatsapp(args):
             session_dir.mkdir(parents=True, exist_ok=True)
             print("  ✓ Session cleared")
         else:
+            # Existing pairing — ensure WHATSAPP_ENABLED reflects that.
+            # (Older installs may have lost the env var; covers re-runs
+            # where the user picked "no, keep my session" but the var
+            # was never set or got removed.)
+            if (get_env_value("WHATSAPP_ENABLED") or "").lower() != "true":
+                save_env_value("WHATSAPP_ENABLED", "true")
             print("\n✓ WhatsApp is configured and paired!")
             print("  Start the gateway with: hermes gateway")
             return
@@ -1647,6 +1657,11 @@ def cmd_whatsapp(args):
     # ── Step 7: Post-pairing ─────────────────────────────────────────────
     print()
     if (session_dir / "creds.json").exists():
+        # Only enable WhatsApp now that pairing actually succeeded.  If the
+        # user Ctrl+C'd at any earlier step, WHATSAPP_ENABLED stays unset
+        # and `hermes gateway` skips it cleanly instead of paying a 30s
+        # bridge timeout + queueing the platform for indefinite retries.
+        save_env_value("WHATSAPP_ENABLED", "true")
         print("✓ WhatsApp paired successfully!")
         print()
         if wa_mode == "bot":
diff --git a/tests/gateway/test_platform_reconnect.py b/tests/gateway/test_platform_reconnect.py
index a0bd7ab9eec..e4362a02562 100644
--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@@ -294,15 +294,63 @@ class TestPlatformReconnectWatcher:
         assert runner._failed_platforms[Platform.TELEGRAM]["attempts"] == 2
 
     @pytest.mark.asyncio
-    async def test_reconnect_gives_up_after_max_attempts(self):
-        """After max attempts, platform should be removed from retry queue."""
+    async def test_reconnect_pauses_after_circuit_breaker_threshold(self):
+        """After enough consecutive retryable failures, the watcher should
+        *pause* the platform (keep it in the queue but stop hammering it),
+        not drop it. The user resumes via /platform resume.
+        """
+        runner = _make_runner()
+
+        platform_config = PlatformConfig(enabled=True, token="test")
+        # 9 prior attempts — the next failure will be the 10th and should
+        # trip the circuit breaker.
+        runner._failed_platforms[Platform.TELEGRAM] = {
+            "config": platform_config,
+            "attempts": 9,
+            "next_retry": time.monotonic() - 1,
+        }
+
+        fail_adapter = StubAdapter(
+            succeed=False, fatal_error="DNS failure", fatal_retryable=True
+        )
+        real_sleep = asyncio.sleep
+
+        with patch.object(runner, "_create_adapter", return_value=fail_adapter):
+            async def run_one_iteration():
+                runner._running = True
+                call_count = 0
+
+                async def fake_sleep(n):
+                    nonlocal call_count
+                    call_count += 1
+                    if call_count > 1:
+                        runner._running = False
+                    await real_sleep(0)
+
+                with patch("asyncio.sleep", side_effect=fake_sleep):
+                    await runner._platform_reconnect_watcher()
+
+            await run_one_iteration()
+
+        # Platform stays in queue — paused, not dropped
+        assert Platform.TELEGRAM in runner._failed_platforms
+        info = runner._failed_platforms[Platform.TELEGRAM]
+        assert info["paused"] is True
+        assert info["attempts"] == 10
+        assert "pause_reason" in info
+
+    @pytest.mark.asyncio
+    async def test_reconnect_skips_paused_platforms(self):
+        """A paused platform should not be retried by the watcher tick."""
         runner = _make_runner()
 
         platform_config = PlatformConfig(enabled=True, token="test")
         runner._failed_platforms[Platform.TELEGRAM] = {
             "config": platform_config,
-            "attempts": 20,  # At max
-            "next_retry": time.monotonic() - 1,
+            "attempts": 10,
+            "next_retry": time.monotonic() - 1,  # would normally retry now
+            "paused": True,
+            "pause_reason": "paused via /platform pause",
         }
 
         real_sleep = asyncio.sleep
@@ -324,8 +372,10 @@ class TestPlatformReconnectWatcher:
 
             await run_one_iteration()
 
-        assert Platform.TELEGRAM not in runner._failed_platforms
-        mock_create.assert_not_called()  # Should give up without trying
+        # Paused platform stays queued and was never touched
+        assert Platform.TELEGRAM in runner._failed_platforms
+        assert runner._failed_platforms[Platform.TELEGRAM]["paused"] is True
+        mock_create.assert_not_called()
 
     @pytest.mark.asyncio
     async def test_reconnect_skips_when_not_time_yet(self):
@@ -459,11 +509,12 @@ class TestRuntimeDisconnectQueuing:
         assert Platform.TELEGRAM not in runner._failed_platforms
 
     @pytest.mark.asyncio
-    async def test_retryable_error_exits_for_service_restart_when_all_down(self):
-        """Gateway should exit with failure when all platforms fail with retryable errors.
-
-        This lets systemd Restart=on-failure restart the process, which is more
-        reliable than in-process background reconnection after exhausted retries.
+    async def test_retryable_error_keeps_gateway_alive_when_all_down(self):
+        """When all adapters fail at runtime with retryable errors, the
+        gateway should stay alive and let the reconnect watcher recover them
+        in the background.  (Previously this exited-with-failure to trigger
+        a systemd restart — that converted transient outages into infinite
+        restart loops and killed in-process state.)
         """
         runner = _make_runner()
         runner.stop = AsyncMock()
@@ -474,9 +525,9 @@ class TestRuntimeDisconnectQueuing:
 
         await runner._handle_adapter_fatal_error(adapter)
 
-        # stop() SHOULD be called — gateway exits for systemd restart
-        runner.stop.assert_called_once()
-        assert runner._exit_with_failure is True
+        # stop() should NOT be called — gateway stays alive for the watcher
+        runner.stop.assert_not_called()
+        assert runner._exit_with_failure is False
         assert Platform.TELEGRAM in runner._failed_platforms
 
     @pytest.mark.asyncio
@@ -512,3 +563,154 @@ class TestRuntimeDisconnectQueuing:
         await runner._handle_adapter_fatal_error(adapter)
 
         runner.stop.assert_called_once()
+
+
+# --- Pause / resume circuit breaker ---
+
+
+class TestPauseResume:
+    """Test the per-platform pause/resume helpers and slash command."""
+
+    def test_pause_marks_platform_paused(self):
+        runner = _make_runner()
+        runner._failed_platforms[Platform.TELEGRAM] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 3,
+            "next_retry": time.monotonic() + 30,
+        }
+        runner._pause_failed_platform(Platform.TELEGRAM, reason="manual")
+        info = runner._failed_platforms[Platform.TELEGRAM]
+        assert info["paused"] is True
+        assert info["pause_reason"] == "manual"
+        assert info["next_retry"] == float("inf")
+
+    def test_pause_is_idempotent(self):
+        runner = _make_runner()
+        runner._failed_platforms[Platform.TELEGRAM] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 3,
+            "next_retry": time.monotonic() + 30,
+            "paused": True,
+            "pause_reason": "first reason",
+        }
+        runner._pause_failed_platform(Platform.TELEGRAM, reason="second reason")
+        # Reason should not be overwritten on a second pause call.
+        assert (
+            runner._failed_platforms[Platform.TELEGRAM]["pause_reason"]
+            == "first reason"
+        )
+
+    def test_pause_no_op_when_platform_not_queued(self):
+        runner = _make_runner()
+        # No exception even when the platform isn't in _failed_platforms.
+        runner._pause_failed_platform(Platform.TELEGRAM, reason="x")
+        assert Platform.TELEGRAM not in runner._failed_platforms
+
+    def test_resume_clears_paused_and_resets_attempts(self):
+        runner = _make_runner()
+        runner._failed_platforms[Platform.TELEGRAM] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 10,
+            "next_retry": float("inf"),
+            "paused": True,
+            "pause_reason": "auto-paused",
+        }
+        assert runner._resume_paused_platform(Platform.TELEGRAM) is True
+        info = runner._failed_platforms[Platform.TELEGRAM]
+        assert info["paused"] is False
+        assert info["attempts"] == 0
+        assert info["next_retry"] != float("inf")
+        assert "pause_reason" not in info
+
+    def test_resume_returns_false_when_not_paused(self):
+        runner = _make_runner()
+        runner._failed_platforms[Platform.TELEGRAM] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 1,
+            "next_retry": time.monotonic() + 30,
+        }
+        assert runner._resume_paused_platform(Platform.TELEGRAM) is False
+
+    def test_resume_returns_false_when_not_queued(self):
+        runner = _make_runner()
+        assert runner._resume_paused_platform(Platform.TELEGRAM) is False
+
+
+class TestPlatformSlashCommand:
+    """Test the /platform list|pause|resume slash command handler."""
+
+    def _make_event(self, content: str):
+        ev = MagicMock()
+        ev.content = content
+        return ev
+
+    @pytest.mark.asyncio
+    async def test_list_shows_connected_and_paused(self):
+        runner = _make_runner()
+        runner.adapters[Platform.DISCORD] = StubAdapter(platform=Platform.DISCORD)
+        runner._failed_platforms[Platform.WHATSAPP] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 10,
+            "next_retry": float("inf"),
+            "paused": True,
+            "pause_reason": "not paired",
+        }
+        out = await runner._handle_platform_command(self._make_event("/platform list"))
+        assert "discord" in out
+        assert "whatsapp" in out
+        assert "PAUSED" in out
+        assert "not paired" in out
+
+    @pytest.mark.asyncio
+    async def test_pause_command_pauses_queued_platform(self):
+        runner = _make_runner()
+        runner._failed_platforms[Platform.WHATSAPP] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 2,
+            "next_retry": time.monotonic() + 30,
+        }
+        out = await runner._handle_platform_command(
+            self._make_event("/platform pause whatsapp")
+        )
+        assert "paused" in out.lower()
+        assert runner._failed_platforms[Platform.WHATSAPP]["paused"] is True
+
+    @pytest.mark.asyncio
+    async def test_pause_rejects_unqueued_platform(self):
+        runner = _make_runner()
+        out = await runner._handle_platform_command(
+            self._make_event("/platform pause whatsapp")
+        )
+        assert "not in the retry queue" in out
+
+    @pytest.mark.asyncio
+    async def test_resume_command_resumes_paused_platform(self):
+        runner = _make_runner()
+        runner._failed_platforms[Platform.WHATSAPP] = {
+            "config": PlatformConfig(enabled=True, token="t"),
+            "attempts": 10,
+            "next_retry": float("inf"),
+            "paused": True,
+            "pause_reason": "x",
+        }
+        out = await runner._handle_platform_command(
+            self._make_event("/platform resume whatsapp")
+        )
+        assert "resumed" in out.lower()
+        assert runner._failed_platforms[Platform.WHATSAPP]["paused"] is False
+
+    @pytest.mark.asyncio
+    async def test_unknown_platform_name(self):
+        runner = _make_runner()
+        out = await runner._handle_platform_command(
+            self._make_event("/platform pause notarealplatform")
+        )
+        assert "Unknown platform" in out
+
+    @pytest.mark.asyncio
+    async def test_bare_platform_shows_usage_with_list(self):
+        # An empty /platform call defaults to "list".
+        runner = _make_runner()
+        out = await runner._handle_platform_command(self._make_event("/platform"))
+        assert "Gateway platforms" in out
+
diff --git a/tests/gateway/test_runner_fatal_adapter.py b/tests/gateway/test_runner_fatal_adapter.py
index 13b9a7d99e8..706514f1ae6 100644
--- a/tests/gateway/test_runner_fatal_adapter.py
+++ b/tests/gateway/test_runner_fatal_adapter.py
@@ -68,7 +68,11 @@ async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monk
 @pytest.mark.asyncio
 async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatch, tmp_path):
     """Retryable runtime fatal errors queue the platform for reconnection
-    instead of shutting down the gateway."""
+    AND keep the gateway alive — the background reconnect watcher recovers
+    the platform when the underlying issue clears.  (Previously this
+    exited-with-failure to trigger a systemd restart; that converted
+    transient failures into infinite restart loops.)
+    """
     config = GatewayConfig(
         platforms={
             Platform.WHATSAPP: PlatformConfig(enabled=True, token="token")
@@ -89,8 +93,8 @@ async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatc
 
     await runner._handle_adapter_fatal_error(adapter)
 
-    # Should shut down with failure — systemd Restart=on-failure will restart
-    runner.stop.assert_awaited_once()
-    assert runner._exit_with_failure is True
+    # Gateway stays alive — watcher will retry in background
+    runner.stop.assert_not_awaited()
+    assert runner._exit_with_failure is False
     assert Platform.WHATSAPP in runner._failed_platforms
     assert runner._failed_platforms[Platform.WHATSAPP]["attempts"] == 0
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index fc5c775a779..438553f34ed 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -64,7 +64,14 @@ class _SuccessfulAdapter(BasePlatformAdapter):
 
 
 @pytest.mark.asyncio
-async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path):
+async def test_runner_stays_alive_for_retryable_startup_errors(monkeypatch, tmp_path):
+    """Retryable startup errors should leave the gateway running in
+    degraded mode so the reconnect watcher can recover the platform when
+    the underlying problem clears.  Previously this returned False from
+    ``start()`` and exited the process, which converted a single broken
+    platform (e.g. unpaired WhatsApp, DNS blip on Telegram) into a
+    systemd restart loop and killed cron jobs in the meantime.
+    """
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     config = GatewayConfig(
         platforms={
@@ -78,11 +85,13 @@ async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch,
 
     ok = await runner.start()
 
-    assert ok is False
+    # Gateway stays alive in degraded mode; reconnect watcher takes over.
+    assert ok is True
     assert runner.should_exit_cleanly is False
     state = read_runtime_status()
-    assert state["gateway_state"] == "startup_failed"
-    assert "temporary DNS resolution failure" in state["exit_reason"]
+    assert state["gateway_state"] in {"degraded", "running"}
+    # Telegram was queued for retry, not given up on.
+    assert Platform.TELEGRAM in runner._failed_platforms
     assert state["platforms"]["telegram"]["state"] == "retrying"
     assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error"
 
diff --git a/tests/gateway/test_whatsapp_connect.py b/tests/gateway/test_whatsapp_connect.py
index 0a359fb7511..9d7807734bb 100644
--- a/tests/gateway/test_whatsapp_connect.py
+++ b/tests/gateway/test_whatsapp_connect.py
@@ -611,3 +611,93 @@ class TestHttpSessionLifecycle:
 
         mock_task.cancel.assert_not_called()
         assert adapter._poll_task is None
+
+
+# ---------------------------------------------------------------------------
+# Pre-flight: refuse to start the bridge when creds.json is missing
+# ---------------------------------------------------------------------------
+
+
+class TestNoCredsPreflight:
+    """Verify ``connect()`` fast-fails as non-retryable when WhatsApp is
+    enabled but the user never finished pairing (no ``creds.json``).
+
+    Without this guard, every gateway boot:
+      • spawned the bridge subprocess (npm install if needed)
+      • waited 30s for status:connected (never happens without creds)
+      • queued WhatsApp for indefinite retries that would just repeat
+    With the guard, ``connect()`` returns False immediately with a
+    non-retryable fatal error so the reconnect watcher drops the platform
+    and the gateway gets a single clear log line telling the user to run
+    ``hermes whatsapp``.
+    """
+
+    @pytest.mark.asyncio
+    async def test_connect_returns_false_when_no_creds(self, tmp_path):
+        from gateway.platforms.whatsapp import WhatsAppAdapter
+
+        adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
+        adapter.platform = Platform.WHATSAPP
+        adapter.config = MagicMock()
+        adapter._bridge_port = 19876
+        # Point bridge_script at a real existing file so the earlier
+        # bridge-missing check doesn't trip — we want to exercise the
+        # creds.json check specifically.
+        bridge = tmp_path / "bridge.js"
+        bridge.write_text("// stub")
+        adapter._bridge_script = str(bridge)
+        adapter._session_path = tmp_path / "session"  # no creds.json inside
+        adapter._session_path.mkdir()
+        adapter._bridge_log_fh = None
+        adapter._fatal_error_code = None
+        adapter._fatal_error_message = None
+        adapter._fatal_error_retryable = True
+
+        with patch(
+            "gateway.platforms.whatsapp.check_whatsapp_requirements",
+            return_value=True,
+        ):
+            result = await adapter.connect()
+
+        assert result is False
+        # Non-retryable so the reconnect watcher drops it cleanly
+        assert adapter._fatal_error_code == "whatsapp_not_paired"
+        assert adapter._fatal_error_retryable is False
+
+    @pytest.mark.asyncio
+    async def test_connect_proceeds_when_creds_present(self, tmp_path):
+        """When creds.json exists, the preflight check is bypassed and
+        connect() proceeds to the bridge bootstrap path. We don't fully
+        simulate the bridge here — we just verify no fast-fail occurs.
+        """
+        from gateway.platforms.whatsapp import WhatsAppAdapter
+
+        adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
+        adapter.platform = Platform.WHATSAPP
+        adapter.config = MagicMock()
+        adapter._bridge_port = 19877
+        bridge = tmp_path / "bridge.js"
+        bridge.write_text("// stub")
+        adapter._bridge_script = str(bridge)
+        session_dir = tmp_path / "session"
+        session_dir.mkdir()
+        (session_dir / "creds.json").write_text("{}")
+        adapter._session_path = session_dir
+        adapter._bridge_log_fh = None
+        adapter._fatal_error_code = None
+        adapter._fatal_error_message = None
+        adapter._fatal_error_retryable = True
+        # Stub _acquire_platform_lock to return False so connect() exits
+        # cleanly *after* the preflight, without spawning subprocesses.
+        adapter._acquire_platform_lock = MagicMock(return_value=False)
+
+        with patch(
+            "gateway.platforms.whatsapp.check_whatsapp_requirements",
+            return_value=True,
+        ):
+            result = await adapter.connect()
+
+        # Preflight passed — exits because we faked lock acquisition,
+        # but the fatal-error code is NOT the "not paired" one.
+        assert result is False
+        assert adapter._fatal_error_code != "whatsapp_not_paired"
diff --git a/tests/hermes_cli/test_whatsapp_setup_ordering.py b/tests/hermes_cli/test_whatsapp_setup_ordering.py
new file mode 100644
index 00000000000..47952bcc796
--- /dev/null
+++ b/tests/hermes_cli/test_whatsapp_setup_ordering.py
@@ -0,0 +1,140 @@
+"""Regression tests for ``cmd_whatsapp`` env-var write ordering.
+
+Before the fix, ``hermes whatsapp`` wrote ``WHATSAPP_ENABLED=true`` at
+step 2 — before npm install (step 4) and before QR pairing (step 6).
+If the user Ctrl+C'd at any later step, ``.env`` claimed WhatsApp was
+ready when the bridge still had no ``creds.json``.  Every subsequent
+``hermes gateway`` then paid a 30s bridge-bootstrap timeout and queued
+WhatsApp for indefinite retries — looking like "the gateway is broken."
+
+The fix: only set ``WHATSAPP_ENABLED=true`` once pairing actually
+succeeds (creds.json exists).  Aborted setup leaves no enabled state.
+"""
+
+from __future__ import annotations
+
+import io
+import os
+from contextlib import redirect_stdout
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture
+def isolated_home(tmp_path, monkeypatch):
+    home = tmp_path / "home"
+    hermes = home / ".hermes"
+    hermes.mkdir(parents=True)
+    monkeypatch.setattr(Path, "home", lambda: home)
+    monkeypatch.setenv("HERMES_HOME", str(hermes))
+    # Ensure get_env_value cache doesn't carry stale state.
+    for key in list(os.environ):
+        if key.startswith("WHATSAPP_"):
+            monkeypatch.delenv(key, raising=False)
+    return hermes
+
+
+def _env_value(hermes_home: Path, key: str) -> str | None:
+    env_file = hermes_home / ".env"
+    if not env_file.exists():
+        return None
+    for line in env_file.read_text().splitlines():
+        if "=" not in line:
+            continue
+        k, _, v = line.partition("=")
+        if k.strip() == key:
+            return v.strip().strip('"').strip("'")
+    return None
+
+
+def test_aborted_setup_does_not_enable_whatsapp(isolated_home, monkeypatch):
+    """User picks mode 1, then Ctrl+C's at the allowed-users prompt.
+
+    WHATSAPP_ENABLED must NOT be present in .env after abort.
+    """
+    from hermes_cli.main import cmd_whatsapp
+
+    # First input() = mode choice, second input() = allowed-users prompt
+    # We raise KeyboardInterrupt on the second call to simulate abort.
+    inputs = iter(["1"])
+
+    def fake_input(_prompt=""):
+        try:
+            return next(inputs)
+        except StopIteration:
+            raise KeyboardInterrupt
+
+    monkeypatch.setattr("builtins.input", fake_input)
+    # _require_tty calls sys.stdin.isatty — make it pass.
+    monkeypatch.setattr("hermes_cli.main._require_tty", lambda *_a, **_kw: None)
+    # No node, no bridge script — we shouldn't reach those steps anyway.
+
+    buf = io.StringIO()
+    with redirect_stdout(buf):
+        try:
+            cmd_whatsapp(MagicMock())
+        except KeyboardInterrupt:
+            pass
+
+    assert _env_value(isolated_home, "WHATSAPP_ENABLED") is None, (
+        "Setup aborted before pairing — WHATSAPP_ENABLED must not be set. "
+        f"Got .env: {(isolated_home / '.env').read_text() if (isolated_home / '.env').exists() else '(missing)'}"
+    )
+
+
+def test_existing_pairing_skip_branch_enables_whatsapp(isolated_home, monkeypatch):
+    """User runs ``hermes whatsapp`` with an existing paired session and
+    chooses "no, keep my session" at the re-pair prompt.  The env var
+    should be (re-)written to true so the gateway picks WhatsApp back up,
+    even if the var was lost since the original pairing.
+    """
+    from hermes_cli.main import cmd_whatsapp
+
+    # Pre-create a paired session WITHOUT WHATSAPP_ENABLED in .env.
+    session = isolated_home / "whatsapp" / "session"
+    session.mkdir(parents=True)
+    (session / "creds.json").write_text("{}")
+    monkeypatch.setenv("WHATSAPP_MODE", "bot")
+    monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15551234567")
+
+    # mode already set → skip mode prompt; users already set → skip update
+    # prompt with "no"; pairing exists → "no, keep session" → return.
+    inputs = iter(["n", "n"])
+
+    def fake_input(_prompt=""):
+        try:
+            return next(inputs)
+        except StopIteration:
+            return "n"
+
+    monkeypatch.setattr("builtins.input", fake_input)
+    monkeypatch.setattr("hermes_cli.main._require_tty", lambda *_a, **_kw: None)
+    # Skip the bridge npm install — we're testing setup-ordering, not bridge
+    # bootstrapping.  Pretend node_modules exists (Path.exists -> True for that
+    # specific check is hard to scope, so instead pretend npm install would
+    # succeed silently if reached).
+    monkeypatch.setattr(
+        "subprocess.run",
+        lambda *_a, **_kw: MagicMock(returncode=0, stderr=""),
+    )
+    monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/npm")
+    # Patch (bridge_dir / "node_modules").exists() by stubbing Path.exists
+    # to True for that one specific subpath.  Easier: pre-create it as a
+    # symlink to /tmp.  But we can't write to the repo.  Instead, stub
+    # Path.exists wholesale to True for node_modules; the creds.json check
+    # in the same function still works because we wrote it ourselves.
+    _orig_exists = Path.exists
+    def _stub_exists(self):
+        if self.name == "node_modules":
+            return True
+        return _orig_exists(self)
+    monkeypatch.setattr(Path, "exists", _stub_exists)
+
+    buf = io.StringIO()
+    with redirect_stdout(buf):
+        cmd_whatsapp(MagicMock())
+
+    # The skip-rebar branch should have set the env var on its way out.
+    assert _env_value(isolated_home, "WHATSAPP_ENABLED") == "true"

From 032fb842225dedf5e6649489f81631465f1aa809 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:44:27 -0700
Subject: [PATCH 062/218] docs(hermes_tools_mcp_server): align scope docstring
 with EXPOSED_TOOLS (#26603)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The top-of-file scope docstring listed delegate_task, memory, and
session_search as exposed tools, but EXPOSED_TOOLS deliberately omits
them (they're _AGENT_LOOP_TOOLS and require the running AIAgent context
to dispatch — the inline comment block already explains this). Kanban
tools, which ARE exposed, were missing from the docstring entirely.

Rewrite the Scope / DO NOT expose sections to match the actual tuple:
drop delegate_task/memory/session_search from 'expose', add the
kanban_* family, move delegate_task/memory/session_search/todo into
'DO NOT expose' with the agent-loop rationale.

Fixes #26567 (doc-only fix; option 2 — shimming memory/session_search
through MemoryStore/SessionDB directly — left for a follow-up issue
once the plugin-memory locking story is audited).
---
 agent/transports/hermes_tools_mcp_server.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/agent/transports/hermes_tools_mcp_server.py b/agent/transports/hermes_tools_mcp_server.py
index f7f8ae24887..37f2d6179d1 100644
--- a/agent/transports/hermes_tools_mcp_server.py
+++ b/agent/transports/hermes_tools_mcp_server.py
@@ -14,20 +14,28 @@ the user gets full Hermes capability inside a Codex turn.
 Scope (what we expose):
   - web_search, web_extract              — Firecrawl, no codex equivalent
   - browser_navigate / _click / _type /  — Camofox/Browserbase automation
-    _snapshot / _screenshot / _scroll / _back / _press / _vision
-  - delegate_task                        — Hermes subagents
+    _snapshot / _scroll / _back / _press /
+    _get_images / _console / _vision
   - vision_analyze                       — image inspection by vision model
   - image_generate                       — image generation
-  - memory                               — Hermes' persistent memory store
   - skill_view, skills_list              — Hermes' skill library
-  - session_search                       — cross-session search
   - text_to_speech                       — TTS
+  - kanban_* (complete/block/comment/    — kanban worker + orchestrator
+    heartbeat/show/list/create/            handoff (stateless: read env var,
+    unblock/link)                          write ~/.hermes/kanban.db)
 
-What we DO NOT expose (codex has equivalents):
+What we DO NOT expose:
   - terminal / shell                     — codex's own shell tool
   - read_file / write_file / patch       — codex's apply_patch + shell
   - search_files / process               — codex's shell
-  - clarify, todo                        — codex's own UX
+  - clarify                              — codex's own UX
+  - delegate_task / memory /             — `_AGENT_LOOP_TOOLS` in Hermes
+    session_search / todo                  (model_tools.py). They require
+                                           the running AIAgent context to
+                                           dispatch (mid-loop state), so a
+                                           stateless MCP callback can't
+                                           drive them. See the inline
+                                           comment on EXPOSED_TOOLS below.
 
 Run with: python -m agent.transports.hermes_tools_mcp_server
 Spawned by: CodexAppServerSession.ensure_started() when the runtime is

From 3215ef160938c71ff61bab279b30545c0cc14a14 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 11:59:22 +0000
Subject: [PATCH 063/218] ci(pypi): build web dashboard + TUI bundle before
 creating wheel

---
 .github/workflows/upload_to_pypi.yml | 21 +++++++++++++++++++++
 pyproject.toml                       |  2 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml
index 4e2fe4748d3..ae68ed034a1 100644
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -50,6 +50,27 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e  # v6
 
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+
+      - name: Build web dashboard
+        run: cd web && npm ci && npm run build
+
+      - name: Build TUI bundle
+        run: cd ui-tui && npm ci && npm run build
+
+      - name: Bundle TUI into hermes_cli
+        run: |
+          mkdir -p hermes_cli/tui_dist
+          cp ui-tui/dist/entry.js hermes_cli/tui_dist/entry.js
+
+      - name: Verify frontend assets exist
+        run: |
+          test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; }
+          test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; }
+
       - name: Build wheel and sdist
         run: uv build --sdist --wheel
 
diff --git a/pyproject.toml b/pyproject.toml
index ae2fff385a3..87674601db0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -210,7 +210,7 @@ hermes-acp = "acp_adapter.entry:main"
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
 
 [tool.setuptools.package-data]
-hermes_cli = ["web_dist/**/*"]
+hermes_cli = ["web_dist/**/*", "tui_dist/**/*"]
 gateway = ["assets/**/*"]
 acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"]
 

From 384ec9684e86081c4add84d671d2bbf7c8ee69d4 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:00:07 +0000
Subject: [PATCH 064/218] feat(banner): check PyPI for updates when not a git
 install

For pip-installed hermes-agent (no .git directory), fall back to
querying PyPI's JSON API to compare __version__ against the latest
published release, using stdlib only (urllib + json, no packaging dep).
---
 hermes_cli/banner.py                       | 48 +++++++++++++++++++++-
 tests/hermes_cli/test_banner_pip_update.py | 35 ++++++++++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 tests/hermes_cli/test_banner_pip_update.py

diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 036412ac072..061992b4746 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -175,6 +175,49 @@ def _check_via_local_git(repo_dir: Path) -> Optional[int]:
     return None
 
 
+def _version_tuple(v: str) -> tuple[int, ...]:
+    """Parse '0.13.0' into (0, 13, 0) for comparison. Non-numeric segments become 0."""
+    parts = []
+    for segment in v.split("."):
+        try:
+            parts.append(int(segment))
+        except ValueError:
+            parts.append(0)
+    return tuple(parts)
+
+
+def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]:
+    """Fetch the latest version of a package from PyPI. Returns None on failure."""
+    try:
+        import urllib.request
+        import json as _json
+        url = f"https://pypi.org/pypi/{package}/json"
+        req = urllib.request.Request(url, headers={"Accept": "application/json"})
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            data = _json.loads(resp.read())
+            return data.get("info", {}).get("version")
+    except Exception:
+        return None
+
+
+def _check_via_pypi() -> Optional[int]:
+    """Compare installed version against PyPI latest.
+
+    Returns 0 if up-to-date, 1 if behind, None on failure.
+    """
+    latest = _fetch_pypi_latest()
+    if latest is None:
+        return None
+    if latest == VERSION:
+        return 0
+    try:
+        if _version_tuple(latest) > _version_tuple(VERSION):
+            return 1
+        return 0
+    except Exception:
+        return 1 if latest != VERSION else 0
+
+
 def check_for_updates() -> Optional[int]:
     """Check whether a Hermes update is available.
 
@@ -213,8 +256,9 @@ def check_for_updates() -> Optional[int]:
         if not (repo_dir / ".git").exists():
             repo_dir = hermes_home / "hermes-agent"
         if not (repo_dir / ".git").exists():
-            return None
-        behind = _check_via_local_git(repo_dir)
+            behind = _check_via_pypi()
+        else:
+            behind = _check_via_local_git(repo_dir)
 
     try:
         cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
diff --git a/tests/hermes_cli/test_banner_pip_update.py b/tests/hermes_cli/test_banner_pip_update.py
new file mode 100644
index 00000000000..a0e9266f698
--- /dev/null
+++ b/tests/hermes_cli/test_banner_pip_update.py
@@ -0,0 +1,35 @@
+from unittest.mock import patch
+
+
+def test_check_via_pypi_detects_update():
+    """_check_via_pypi returns 1 when PyPI has newer version."""
+    from hermes_cli.banner import _check_via_pypi
+    with patch("hermes_cli.banner.VERSION", "0.12.0"):
+        with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"):
+            result = _check_via_pypi()
+            assert result == 1
+
+
+def test_check_via_pypi_up_to_date():
+    """_check_via_pypi returns 0 when versions match."""
+    from hermes_cli.banner import _check_via_pypi
+    with patch("hermes_cli.banner.VERSION", "0.13.0"):
+        with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"):
+            result = _check_via_pypi()
+            assert result == 0
+
+
+def test_check_via_pypi_network_failure():
+    """_check_via_pypi returns None on network error."""
+    from hermes_cli.banner import _check_via_pypi
+    with patch("hermes_cli.banner._fetch_pypi_latest", return_value=None):
+        result = _check_via_pypi()
+        assert result is None
+
+
+def test_version_tuple_comparison():
+    """Version comparison works with multi-segment versions."""
+    from hermes_cli.banner import _version_tuple
+    assert _version_tuple("0.13.0") > _version_tuple("0.12.0")
+    assert _version_tuple("0.13.0") == _version_tuple("0.13.0")
+    assert _version_tuple("1.0.0") > _version_tuple("0.99.99")

From cc07e30f45267c00fac97ea5569c606aca5a1ffb Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:00:28 +0000
Subject: [PATCH 065/218] feat(install): add --ensure and --postinstall modes
 for targeted dep bootstrap

Adds --ensure DEPS for pip-runtime dep installation and --postinstall
for pip users who want the full post-install experience without cloning.
---
 scripts/install.sh | 106 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 1 deletion(-)

diff --git a/scripts/install.sh b/scripts/install.sh
index 9c5db6b1c08..9b1b7469bb8 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -71,6 +71,8 @@ USE_VENV=true
 RUN_SETUP=true
 SKIP_BROWSER=false
 BRANCH="main"
+ENSURE_DEPS=""
+POSTINSTALL_MODE=false
 
 # Detect non-interactive mode (e.g. curl | bash)
 # When stdin is not a terminal, read -p will fail with EOF,
@@ -109,6 +111,14 @@ while [[ $# -gt 0 ]]; do
             HERMES_HOME="$2"
             shift 2
             ;;
+        --ensure)
+            ENSURE_DEPS="$2"
+            shift 2
+            ;;
+        --postinstall)
+            POSTINSTALL_MODE=true
+            shift
+            ;;
         -h|--help)
             echo "Hermes Agent Installer"
             echo ""
@@ -133,6 +143,12 @@ while [[ $# -gt 0 ]]; do
             echo "  (default /root/.hermes).  This keeps Docker bind-mounted volumes"
             echo "  small and ensures the command is on PATH for all shells."
             echo "  Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
+            echo "  --ensure DEPS  Install only specified deps (comma-separated)"
+            echo "                   Supported: node, browser, ripgrep, ffmpeg"
+            echo "                   Does NOT clone repo or create venv"
+            echo "  --postinstall  Run post-install setup only (for pip users)"
+            echo "                   Installs optional deps + runs hermes setup"
+            echo "                   Does NOT clone repo or create venv"
             exit 0
             ;;
         *)
@@ -1872,6 +1888,88 @@ print_success() {
     fi
 }
 
+ensure_mode() {
+    detect_os
+
+    IFS=',' read -ra DEPS <<< "$ENSURE_DEPS"
+    for dep in "${DEPS[@]}"; do
+        dep="$(echo "$dep" | tr -d '[:space:]')"
+        case "$dep" in
+            node)
+                check_node
+                ;;
+            browser)
+                check_node
+                if [ "$HAS_NODE" = true ]; then
+                    DETECTED_BROWSER_EXECUTABLE="$(find_system_browser 2>/dev/null || true)"
+                    if [ -z "$DETECTED_BROWSER_EXECUTABLE" ]; then
+                        log_info "Installing agent-browser + Chromium..."
+                        npm_bin="$(command -v npm 2>/dev/null || echo "")"
+                        if [ -n "$npm_bin" ]; then
+                            local agent_browser_dir="$HERMES_HOME/node_modules"
+                            mkdir -p "$agent_browser_dir"
+                            "$npm_bin" install --prefix "$HERMES_HOME" agent-browser 2>/dev/null || true
+                            npx playwright install chromium 2>/dev/null || true
+                        fi
+                    else
+                        log_success "System browser found: $DETECTED_BROWSER_EXECUTABLE"
+                    fi
+                fi
+                ;;
+            ripgrep)
+                if ! command -v rg &>/dev/null; then
+                    HAS_RIPGREP=false
+                    HAS_FFMPEG=true
+                    install_system_packages
+                fi
+                ;;
+            ffmpeg)
+                if ! command -v ffmpeg &>/dev/null; then
+                    HAS_FFMPEG=false
+                    HAS_RIPGREP=true
+                    install_system_packages
+                fi
+                ;;
+            *)
+                log_warn "Unknown dependency: $dep"
+                ;;
+        esac
+    done
+}
+
+postinstall_mode() {
+    print_banner
+    detect_os
+
+    log_info "Post-install mode: setting up Hermes for pip install"
+
+    check_node
+    check_network_prerequisites
+    install_system_packages
+
+    if [ "$HAS_NODE" = true ] && [ "$SKIP_BROWSER" = false ]; then
+        DETECTED_BROWSER_EXECUTABLE="$(find_system_browser 2>/dev/null || true)"
+        if [ -z "$DETECTED_BROWSER_EXECUTABLE" ]; then
+            log_info "Installing browser engine..."
+            npm_bin="$(command -v npm 2>/dev/null || echo "")"
+            if [ -n "$npm_bin" ]; then
+                npx playwright install chromium 2>/dev/null || true
+            fi
+        else
+            log_success "System browser found: $DETECTED_BROWSER_EXECUTABLE"
+        fi
+    fi
+
+    HERMES_CMD="$(command -v hermes 2>/dev/null || echo "")"
+    if [ -n "$HERMES_CMD" ]; then
+        log_info "Running hermes setup..."
+        "$HERMES_CMD" setup
+    else
+        log_warn "hermes command not found on PATH"
+        log_info "Try: python -m hermes_cli.main setup"
+    fi
+}
+
 # ============================================================================
 # Main
 # ============================================================================
@@ -1900,4 +1998,10 @@ main() {
     print_success
 }
 
-main
+if [ -n "$ENSURE_DEPS" ]; then
+    ensure_mode
+elif [ "$POSTINSTALL_MODE" = true ]; then
+    postinstall_mode
+else
+    main
+fi

From c4bda3f27c033f33eef824efc3e689119bfbee72 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:00:54 +0000
Subject: [PATCH 066/218] fix(doctor): generate config from defaults when
 template file is missing

When cli-config.yaml.example is not present (e.g. pip wheel install),
fall back to writing DEFAULT_CONFIG via save_config() instead of
warning and requiring a manual fix.
---
 hermes_cli/doctor.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index c2035b03e6e..bf5a8865909 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -656,15 +656,17 @@ def run_doctor(args):
         if fallback_config.exists():
             check_ok("cli-config.yaml exists (in project directory)")
         else:
-            example_config = PROJECT_ROOT / 'cli-config.yaml.example'
-            if should_fix and example_config.exists():
+            if should_fix:
                 config_path.parent.mkdir(parents=True, exist_ok=True)
-                shutil.copy2(str(example_config), str(config_path))
-                check_ok(f"Created {_DHH}/config.yaml from cli-config.yaml.example")
+                example_config = PROJECT_ROOT / 'cli-config.yaml.example'
+                if example_config.exists():
+                    shutil.copy2(str(example_config), str(config_path))
+                    check_ok(f"Created {_DHH}/config.yaml from cli-config.yaml.example")
+                else:
+                    from hermes_cli.config import DEFAULT_CONFIG, save_config
+                    save_config(DEFAULT_CONFIG)
+                    check_ok(f"Created {_DHH}/config.yaml from defaults")
                 fixed_count += 1
-            elif should_fix:
-                check_warn("config.yaml not found and no example to copy from")
-                manual_issues.append(f"Create {_DHH}/config.yaml manually")
             else:
                 check_warn("config.yaml not found", "(using defaults)")
 

From d69eab1efd96a4622e6b00fbb806d1cd049b3589 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:01:31 +0000
Subject: [PATCH 067/218] fix(gateway): build service PATH from existing dirs
 only, include ~/.hermes/node_modules

Extract PATH building into _build_service_path_dirs() that skips directories
which don't exist on disk (e.g. node_modules/.bin for pip installs) and also
includes ~/.hermes/node/bin and ~/.hermes/node_modules/.bin for agent-browser.
---
 hermes_cli/gateway.py                         | 38 +++++++++++++++----
 .../hermes_cli/test_gateway_service_paths.py  | 31 +++++++++++++++
 2 files changed, 61 insertions(+), 8 deletions(-)
 create mode 100644 tests/hermes_cli/test_gateway_service_paths.py

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index b0cb579daa8..a865bcaf8be 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2103,15 +2103,41 @@ def _hermes_home_for_target_user(target_home_dir: str) -> str:
         return str(current_hermes)
 
 
+def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
+    """Build PATH directory list for service units, excluding non-existent dirs."""
+    if project_root is None:
+        project_root = PROJECT_ROOT
+
+    candidates = []
+
+    venv_bin = project_root / "venv" / "bin"
+    if venv_bin.is_dir():
+        candidates.append(str(venv_bin))
+    elif sys.prefix != sys.base_prefix:
+        candidates.append(str(Path(sys.prefix) / "bin"))
+
+    node_bin = project_root / "node_modules" / ".bin"
+    if node_bin.is_dir():
+        candidates.append(str(node_bin))
+
+    hermes_home = get_hermes_home()
+    hermes_node = hermes_home / "node" / "bin"
+    if hermes_node.is_dir():
+        candidates.append(str(hermes_node))
+    hermes_nm = hermes_home / "node_modules" / ".bin"
+    if hermes_nm.is_dir():
+        candidates.append(str(hermes_nm))
+
+    return candidates
+
+
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
     python_path = get_python_path()
     working_dir = str(PROJECT_ROOT)
     detected_venv = _detect_venv_dir()
     venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
-    venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
-    node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")
 
-    path_entries = [venv_bin, node_bin]
+    path_entries = _build_service_path_dirs()
     resolved_node = shutil.which("node")
     if resolved_node:
         resolved_node_dir = str(Path(resolved_node).resolve().parent)
@@ -2138,8 +2164,6 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
         python_path = _remap_path_for_user(python_path, home_dir)
         working_dir = _remap_path_for_user(working_dir, home_dir)
         venv_dir = _remap_path_for_user(venv_dir, home_dir)
-        venv_bin = _remap_path_for_user(venv_bin, home_dir)
-        node_bin = _remap_path_for_user(node_bin, home_dir)
         path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries]
         path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
         path_entries.extend(_build_wsl_interop_paths(path_entries))
@@ -2754,12 +2778,10 @@ def generate_launchd_plist() -> str:
     # the systemd unit), then capture the user's full shell PATH so every
     # user-installed tool (node, ffmpeg, …) is reachable.
     detected_venv = _detect_venv_dir()
-    venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
     venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
-    node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")
     # Resolve the directory containing the node binary (e.g. Homebrew, nvm)
     # so it's explicitly in PATH even if the user's shell PATH changes later.
-    priority_dirs = [venv_bin, node_bin]
+    priority_dirs = _build_service_path_dirs()
     resolved_node = shutil.which("node")
     if resolved_node:
         resolved_node_dir = str(Path(resolved_node).resolve().parent)
diff --git a/tests/hermes_cli/test_gateway_service_paths.py b/tests/hermes_cli/test_gateway_service_paths.py
new file mode 100644
index 00000000000..71abc4aef24
--- /dev/null
+++ b/tests/hermes_cli/test_gateway_service_paths.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_service_path_skips_nonexistent_node_modules(tmp_path):
+    """Service PATH should not include node_modules/.bin if it doesn't exist."""
+    from hermes_cli.gateway import _build_service_path_dirs
+    with patch("hermes_cli.gateway.get_hermes_home", return_value=tmp_path / ".hermes"):
+        dirs = _build_service_path_dirs(project_root=tmp_path)
+    node_modules_bin = str(tmp_path / "node_modules" / ".bin")
+    assert node_modules_bin not in dirs
+
+
+def test_service_path_includes_node_modules_when_present(tmp_path):
+    """Service PATH should include node_modules/.bin when it exists."""
+    nm_bin = tmp_path / "node_modules" / ".bin"
+    nm_bin.mkdir(parents=True)
+    from hermes_cli.gateway import _build_service_path_dirs
+    with patch("hermes_cli.gateway.get_hermes_home", return_value=tmp_path / ".hermes"):
+        dirs = _build_service_path_dirs(project_root=tmp_path)
+    assert str(nm_bin) in dirs
+
+
+def test_service_path_includes_hermes_home_node_modules(tmp_path):
+    """Service PATH should include ~/.hermes/node_modules/.bin when it exists."""
+    hermes_nm = tmp_path / ".hermes" / "node_modules" / ".bin"
+    hermes_nm.mkdir(parents=True)
+    from hermes_cli.gateway import _build_service_path_dirs
+    with patch("hermes_cli.gateway.get_hermes_home", return_value=tmp_path / ".hermes"):
+        dirs = _build_service_path_dirs(project_root=tmp_path)
+    assert str(hermes_nm) in dirs

From b2bf658442f413a9a1d24b011589e5e38544947e Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:02:23 +0000
Subject: [PATCH 068/218] feat(tui): find bundled entry.js from wheel before
 falling back to npm build

Add _find_bundled_tui() that checks for hermes_cli/tui_dist/entry.js
(present in wheel installs) and wire it into _make_tui_argv() between
the HERMES_TUI_DIR prebuilt path and the npm install fallback.
---
 hermes_cli/main.py                   | 14 ++++++++++++++
 tests/hermes_cli/test_tui_bundled.py | 21 +++++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 tests/hermes_cli/test_tui_bundled.py

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 7eedc3fd322..1324ff8e8e7 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1024,6 +1024,14 @@ def _ensure_tui_node() -> None:
     os.environ["PATH"] = os.pathsep.join(parts)
 
 
+def _find_bundled_tui(hermes_cli_dir: Path | None = None) -> Path | None:
+    """Find a pre-built TUI entry.js bundled in the wheel."""
+    if hermes_cli_dir is None:
+        hermes_cli_dir = Path(__file__).parent
+    bundled = hermes_cli_dir / "tui_dist" / "entry.js"
+    return bundled if bundled.is_file() else None
+
+
 def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
     """TUI: --dev → tsx src; else node dist (HERMES_TUI_DIR prebuilt or esbuild)."""
     _ensure_tui_node()
@@ -1058,6 +1066,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
                 node = _node_bin("node")
                 return [node, str(p / "dist" / "entry.js")], p
 
+        # 1b. Bundled in wheel (pip install)
+        bundled = _find_bundled_tui()
+        if bundled is not None:
+            node = _node_bin("node")
+            return [node, str(bundled)], bundled.parent
+
     # 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
     #    --dev flow: npm install if needed, then tsx src/entry.tsx (no build).
     if _tui_need_npm_install(tui_dir):
diff --git a/tests/hermes_cli/test_tui_bundled.py b/tests/hermes_cli/test_tui_bundled.py
new file mode 100644
index 00000000000..c49443a3f76
--- /dev/null
+++ b/tests/hermes_cli/test_tui_bundled.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+
+def test_tui_finds_bundled_entry_js(tmp_path):
+    """_find_bundled_tui finds entry.js bundled in the package."""
+    tui_dist = tmp_path / "hermes_cli" / "tui_dist"
+    tui_dist.mkdir(parents=True)
+    entry = tui_dist / "entry.js"
+    entry.write_text("// bundled TUI", encoding="utf-8")
+
+    from hermes_cli.main import _find_bundled_tui
+    result = _find_bundled_tui(hermes_cli_dir=tmp_path / "hermes_cli")
+    assert result is not None
+    assert result.name == "entry.js"
+
+
+def test_tui_returns_none_when_no_bundle(tmp_path):
+    """_find_bundled_tui returns None when no bundle exists."""
+    from hermes_cli.main import _find_bundled_tui
+    result = _find_bundled_tui(hermes_cli_dir=tmp_path / "hermes_cli")
+    assert result is None

From 624ce11ee846b57b59ca2e031f34e25813137c4d Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:02:44 +0000
Subject: [PATCH 069/218] feat(config): detect pip install method and recommend
 correct update command

Adds detect_install_method() to identify nixos/homebrew/git/pip installs,
and recommended_update_command_for_method() to return the right upgrade command
for each method. Updates recommended_update_command() to use these for pip-installed
instances (no .git dir, not managed).
---
 hermes_cli/config.py                          | 33 ++++++++++++++++-
 .../hermes_cli/test_pip_install_detection.py  | 37 +++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 tests/hermes_cli/test_pip_install_detection.py

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index a560e1e6a1e..10dd7b46412 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -199,9 +199,40 @@ def get_managed_update_command() -> Optional[str]:
     return None
 
 
+def detect_install_method(project_root: Optional[Path] = None) -> str:
+    """Detect how Hermes was installed: 'nixos', 'homebrew', 'git', or 'pip'."""
+    managed = get_managed_system()
+    if managed:
+        return managed.lower().replace(" ", "-")
+    if project_root is None:
+        project_root = Path(__file__).parent.parent.resolve()
+    if (project_root / ".git").is_dir():
+        return "git"
+    return "pip"
+
+
+def recommended_update_command_for_method(method: str) -> str:
+    """Return the update command for a given install method."""
+    if method == "nixos":
+        return "sudo nixos-rebuild switch"
+    if method == "homebrew":
+        return "brew upgrade hermes-agent"
+    if method == "pip":
+        import shutil
+        uv = shutil.which("uv")
+        if uv:
+            return "uv pip install --upgrade hermes-agent"
+        return "pip install --upgrade hermes-agent"
+    return "hermes update"
+
+
 def recommended_update_command() -> str:
     """Return the best update command for the current installation."""
-    return get_managed_update_command() or "hermes update"
+    managed_cmd = get_managed_update_command()
+    if managed_cmd:
+        return managed_cmd
+    method = detect_install_method()
+    return recommended_update_command_for_method(method)
 
 
 def format_managed_message(action: str = "modify this Hermes installation") -> str:
diff --git a/tests/hermes_cli/test_pip_install_detection.py b/tests/hermes_cli/test_pip_install_detection.py
new file mode 100644
index 00000000000..b0f4cbd75ad
--- /dev/null
+++ b/tests/hermes_cli/test_pip_install_detection.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_pip_install_detected_when_no_git_dir(tmp_path):
+    """When PROJECT_ROOT has no .git, detect as pip install."""
+    with patch("hermes_cli.config.get_managed_system", return_value=None):
+        from hermes_cli.config import detect_install_method
+        method = detect_install_method(project_root=tmp_path)
+        assert method == "pip"
+
+
+def test_git_install_detected_when_git_dir_exists(tmp_path):
+    """When PROJECT_ROOT has .git, detect as git install."""
+    (tmp_path / ".git").mkdir()
+    with patch("hermes_cli.config.get_managed_system", return_value=None):
+        from hermes_cli.config import detect_install_method
+        method = detect_install_method(project_root=tmp_path)
+        assert method == "git"
+
+
+def test_managed_install_takes_precedence(tmp_path):
+    """When HERMES_MANAGED is set, that takes precedence over git detection."""
+    (tmp_path / ".git").mkdir()
+    with patch("hermes_cli.config.get_managed_system", return_value="NixOS"):
+        from hermes_cli.config import detect_install_method
+        method = detect_install_method(project_root=tmp_path)
+        assert method == "nixos"
+
+
+def test_recommended_update_command_pip():
+    """Pip installs recommend pip install --upgrade."""
+    from hermes_cli.config import recommended_update_command_for_method
+    cmd = recommended_update_command_for_method("pip")
+    assert "pip install" in cmd or "uv pip install" in cmd
+    assert "--upgrade" in cmd
+    assert "hermes-agent" in cmd

From 79afa50703d18f91fb7878a7b7a31b425ab40382 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:04:34 +0000
Subject: [PATCH 070/218] feat(update): support pip install --upgrade for PyPI
 installs

When .git is absent and detect_install_method returns "pip", fork
hermes update to run `uv pip install --upgrade hermes-agent` (or
`python -m pip install --upgrade hermes-agent` as fallback) instead of
hard-exiting with "Not a git repository".
---
 hermes_cli/main.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 1324ff8e8e7..ea050126736 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -7671,6 +7671,29 @@ def cmd_update(args):
         _finalize_update_output(_update_io_state)
 
 
+def _cmd_update_pip(args):
+    """Update Hermes via pip (for PyPI installs)."""
+    import subprocess as _sp
+    from hermes_cli import __version__
+
+    print(f"→ Current version: {__version__}")
+    print("→ Checking PyPI for updates...")
+
+    uv = shutil.which("uv")
+    if uv:
+        cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
+    else:
+        cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]
+
+    print(f"→ Running: {' '.join(cmd)}")
+    result = _sp.run(cmd)
+    if result.returncode != 0:
+        print("✗ Update failed")
+        sys.exit(1)
+
+    print("✓ Update complete! Restart hermes to use the new version.")
+
+
 def _cmd_update_impl(args, gateway_mode: bool):
     """Body of ``cmd_update`` — kept separate so the wrapper can always
     restore stdio even on ``sys.exit``."""
@@ -7698,6 +7721,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
         if sys.platform == "win32":
             use_zip_update = True
         else:
+            from hermes_cli.config import detect_install_method
+            method = detect_install_method(PROJECT_ROOT)
+            if method == "pip":
+                _cmd_update_pip(args)
+                return
             print("✗ Not a git repository. Please reinstall:")
             print(
                 "  curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash"

From bea96e5cac3caf12885056fbc3a400cb5c008540 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:05:12 +0000
Subject: [PATCH 071/218] chore(config): expand ensure_hermes_home to create
 full directory scaffold

Match the full set of subdirs created by install.sh: pairing, hooks,
image_cache, audio_cache, and skills are now pre-created alongside the
existing cron, sessions, logs, logs/curator, and memories dirs. This
makes hermes doctor checks cleaner without changing any runtime behaviour.
---
 hermes_cli/config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 10dd7b46412..508de0d3faa 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -432,7 +432,10 @@ def ensure_hermes_home():
     else:
         home.mkdir(parents=True, exist_ok=True)
         _secure_dir(home)
-        for subdir in ("cron", "sessions", "logs", "logs/curator", "memories"):
+        for subdir in (
+            "cron", "sessions", "logs", "logs/curator", "memories",
+            "pairing", "hooks", "image_cache", "audio_cache", "skills",
+        ):
             d = home / subdir
             d.mkdir(parents=True, exist_ok=True)
             _secure_dir(d)

From 259ae846c8ae1b84d4cbd2cb1d62c6eefd81957f Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:06:05 +0000
Subject: [PATCH 072/218] feat: add ensure_dependency() wrapper + ship
 install.sh in wheel

Includes paired change: browser tool now searches ~/.hermes/node_modules/.bin/
for agent-browser installed via install.sh --ensure browser.
---
 .github/workflows/upload_to_pypi.yml |  5 ++
 hermes_cli/dep_ensure.py             | 96 ++++++++++++++++++++++++++++
 pyproject.toml                       |  2 +-
 tests/hermes_cli/test_dep_ensure.py  | 43 +++++++++++++
 tools/browser_tool.py                |  3 +-
 5 files changed, 147 insertions(+), 2 deletions(-)
 create mode 100644 hermes_cli/dep_ensure.py
 create mode 100644 tests/hermes_cli/test_dep_ensure.py

diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml
index ae68ed034a1..9dce018d690 100644
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -71,6 +71,11 @@ jobs:
           test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; }
           test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; }
 
+      - name: Bundle install.sh into wheel
+        run: |
+          mkdir -p hermes_cli/scripts
+          cp scripts/install.sh hermes_cli/scripts/install.sh
+
       - name: Build wheel and sdist
         run: uv build --sdist --wheel
 
diff --git a/hermes_cli/dep_ensure.py b/hermes_cli/dep_ensure.py
new file mode 100644
index 00000000000..03ddd80ef84
--- /dev/null
+++ b/hermes_cli/dep_ensure.py
@@ -0,0 +1,96 @@
+"""Lazy dependency bootstrapper for non-Python runtime deps.
+
+Wraps install.sh --ensure to install node, browser, ripgrep, ffmpeg
+on first use. Prompts interactively unless told not to.
+"""
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+_DEP_CHECKS = {
+    "node": lambda: shutil.which("node") is not None,
+    "browser": lambda: (
+        shutil.which("agent-browser") is not None
+        or _has_system_browser()
+        or _has_hermes_agent_browser()
+    ),
+    "ripgrep": lambda: shutil.which("rg") is not None,
+    "ffmpeg": lambda: shutil.which("ffmpeg") is not None,
+}
+
+_DEP_DESCRIPTIONS = {
+    "node": "Node.js (required for browser tools and TUI)",
+    "browser": "Browser engine (Chromium, for web browsing tools)",
+    "ripgrep": "ripgrep (fast file search)",
+    "ffmpeg": "ffmpeg (TTS voice messages)",
+}
+
+
+def _has_system_browser() -> bool:
+    for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
+        if shutil.which(name):
+            return True
+    return False
+
+
+def _has_hermes_agent_browser() -> bool:
+    hermes_home = os.environ.get("HERMES_HOME", str(Path.home() / ".hermes"))
+    return (Path(hermes_home) / "node_modules" / ".bin" / "agent-browser").is_file()
+
+
+def _find_install_script(
+    package_dir: Path | None = None,
+    repo_root: Path | None = None,
+) -> Path | None:
+    """Locate install.sh — bundled in wheel or in git checkout."""
+    if package_dir is None:
+        package_dir = Path(__file__).parent
+    if repo_root is None:
+        repo_root = package_dir.parent
+
+    bundled = package_dir / "scripts" / "install.sh"
+    if bundled.is_file():
+        return bundled
+    repo = repo_root / "scripts" / "install.sh"
+    if repo.is_file():
+        return repo
+    return None
+
+
+def ensure_dependency(dep: str, interactive: bool = True) -> bool:
+    """Ensure a non-Python dependency is available. Returns True if available."""
+    check = _DEP_CHECKS.get(dep)
+    if check and check():
+        return True
+
+    script = _find_install_script()
+    if script is None:
+        if interactive:
+            desc = _DEP_DESCRIPTIONS.get(dep, dep)
+            print(f"  {desc} is not installed and install.sh was not found.")
+            print(f"  Install {dep} manually and try again.")
+        return False
+
+    if interactive and sys.stdin.isatty():
+        desc = _DEP_DESCRIPTIONS.get(dep, dep)
+        try:
+            reply = input(f"{desc} is not installed. Install now? [Y/n] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            return False
+        if reply not in ("", "y", "yes"):
+            return False
+
+    result = subprocess.run(
+        ["bash", str(script), "--ensure", dep],
+        env={**os.environ, "IS_INTERACTIVE": "false"},
+    )
+    if result.returncode != 0:
+        return False
+
+    if check:
+        return check()
+    return True
diff --git a/pyproject.toml b/pyproject.toml
index 87674601db0..fff11f6a5d9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -210,7 +210,7 @@ hermes-acp = "acp_adapter.entry:main"
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
 
 [tool.setuptools.package-data]
-hermes_cli = ["web_dist/**/*", "tui_dist/**/*"]
+hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh"]
 gateway = ["assets/**/*"]
 acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"]
 
diff --git a/tests/hermes_cli/test_dep_ensure.py b/tests/hermes_cli/test_dep_ensure.py
new file mode 100644
index 00000000000..c980c290099
--- /dev/null
+++ b/tests/hermes_cli/test_dep_ensure.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_ensure_dependency_skips_when_present():
+    """ensure_dependency is a no-op when the dep is already available."""
+    from hermes_cli.dep_ensure import ensure_dependency
+    with patch("hermes_cli.dep_ensure.shutil") as mock_shutil:
+        mock_shutil.which.return_value = "/usr/bin/node"
+        result = ensure_dependency("node", interactive=False)
+        assert result is True
+
+
+def test_ensure_dependency_returns_false_when_missing_noninteractive():
+    """ensure_dependency returns False for missing dep in non-interactive mode."""
+    from hermes_cli.dep_ensure import ensure_dependency
+    with patch("hermes_cli.dep_ensure.shutil") as mock_shutil:
+        mock_shutil.which.return_value = None
+        with patch("hermes_cli.dep_ensure._find_install_script", return_value=None):
+            result = ensure_dependency("node", interactive=False)
+            assert result is False
+
+
+def test_find_install_script_from_checkout(tmp_path):
+    """_find_install_script finds scripts/install.sh in a git checkout."""
+    from hermes_cli.dep_ensure import _find_install_script
+    scripts_dir = tmp_path / "scripts"
+    scripts_dir.mkdir()
+    (scripts_dir / "install.sh").write_text("#!/bin/bash", encoding="utf-8")
+    result = _find_install_script(package_dir=tmp_path / "hermes_cli", repo_root=tmp_path)
+    assert result is not None
+    assert result.name == "install.sh"
+
+
+def test_find_install_script_from_wheel(tmp_path):
+    """_find_install_script finds bundled install.sh in a wheel."""
+    from hermes_cli.dep_ensure import _find_install_script
+    bundled = tmp_path / "hermes_cli" / "scripts"
+    bundled.mkdir(parents=True)
+    (bundled / "install.sh").write_text("#!/bin/bash", encoding="utf-8")
+    result = _find_install_script(package_dir=tmp_path / "hermes_cli", repo_root=tmp_path)
+    assert result is not None
+    assert result.name == "install.sh"
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 575beba6c02..c01d25a6f0b 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -144,7 +144,8 @@ def _browser_candidate_path_dirs() -> list[str]:
     """Return ordered browser CLI PATH candidates shared by discovery and execution."""
     hermes_home = get_hermes_home()
     hermes_node_bin = str(hermes_home / "node" / "bin")
-    return [hermes_node_bin, *list(_discover_homebrew_node_dirs()), *_SANE_PATH_DIRS]
+    hermes_nm_bin = str(hermes_home / "node_modules" / ".bin")
+    return [hermes_node_bin, hermes_nm_bin, *list(_discover_homebrew_node_dirs()), *_SANE_PATH_DIRS]
 
 
 def _merge_browser_path(existing_path: str = "") -> str:

From 96917fb74ae4b9857671f7addb957db0774e4c9f Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 12:38:04 +0000
Subject: [PATCH 073/218] =?UTF-8?q?refactor:=20fix=20review=20findings=20?=
 =?UTF-8?q?=E2=80=94=20remove=20duplicate=20imports=20and=20deduplicate=20?=
 =?UTF-8?q?update=20command?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- banner.py: remove redundant `import json as _json` (json already at module level)
- main.py: _cmd_update_pip now delegates to recommended_update_command_for_method
  instead of duplicating the uv-vs-pip detection logic
- main.py: remove redundant `import subprocess as _sp` (subprocess already at module level)
---
 hermes_cli/banner.py |  3 +--
 hermes_cli/main.py   | 13 ++++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 061992b4746..077ee41f0a2 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -190,11 +190,10 @@ def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]:
     """Fetch the latest version of a package from PyPI. Returns None on failure."""
     try:
         import urllib.request
-        import json as _json
         url = f"https://pypi.org/pypi/{package}/json"
         req = urllib.request.Request(url, headers={"Accept": "application/json"})
         with urllib.request.urlopen(req, timeout=5) as resp:
-            data = _json.loads(resp.read())
+            data = json.loads(resp.read())
             return data.get("info", {}).get("version")
     except Exception:
         return None
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index ea050126736..95947641aa5 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -7673,20 +7673,19 @@ def cmd_update(args):
 
 def _cmd_update_pip(args):
     """Update Hermes via pip (for PyPI installs)."""
-    import subprocess as _sp
     from hermes_cli import __version__
+    from hermes_cli.config import recommended_update_command_for_method
 
     print(f"→ Current version: {__version__}")
     print("→ Checking PyPI for updates...")
 
-    uv = shutil.which("uv")
-    if uv:
-        cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
-    else:
-        cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]
+    cmd_str = recommended_update_command_for_method("pip")
+    cmd = cmd_str.split()
+    if cmd[0] == "pip":
+        cmd = [sys.executable, "-m", "pip"] + cmd[1:]
 
     print(f"→ Running: {' '.join(cmd)}")
-    result = _sp.run(cmd)
+    result = subprocess.run(cmd)
     if result.returncode != 0:
         print("✗ Update failed")
         sys.exit(1)

From 55a7c45d379f288fb6dc0eb4e484e82b73471b2c Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:01:37 +0000
Subject: [PATCH 074/218] fix(update): handle --check for pip installs (missed
 code path)

_cmd_update_check() had its own `.git` gate separate from _cmd_update_impl.
For pip installs, fork to _check_via_pypi() and display the result with
the correct recommended_update_command().
---
 hermes_cli/main.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 95947641aa5..bb372c396f1 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -7396,6 +7396,19 @@ def _cmd_update_check():
     """Implement ``hermes update --check``: fetch and report without installing."""
     git_dir = PROJECT_ROOT / ".git"
     if not git_dir.exists():
+        from hermes_cli.config import detect_install_method, recommended_update_command
+        if detect_install_method(PROJECT_ROOT) == "pip":
+            from hermes_cli.banner import _check_via_pypi
+            result = _check_via_pypi()
+            if result is None:
+                print("✗ Could not reach PyPI to check for updates.")
+                sys.exit(1)
+            elif result == 0:
+                print("✓ Already up to date.")
+            else:
+                print(f"⚕ Update available on PyPI.")
+                print(f"  Run '{recommended_update_command()}' to install.")
+            return
         print("✗ Not a git repository — cannot check for updates.")
         sys.exit(1)
 

From e38a478c05e84f7fe563a1c9e980a0cebc8e4d02 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:22:21 +0000
Subject: [PATCH 075/218] chore(ci): pin actions/setup-node to SHA for
 supply-chain consistency

---
 .github/workflows/upload_to_pypi.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/upload_to_pypi.yml b/.github/workflows/upload_to_pypi.yml
index 9dce018d690..95477ccf01f 100644
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -51,7 +51,7 @@ jobs:
         uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e  # v6
 
       - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
         with:
           node-version: '22'
 

From c57709a3d68e7972bbc7180a1d6811f5f38546d1 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:28:21 +0000
Subject: [PATCH 076/218] feat: wire ensure_dependency into TUI and browser
 tool call sites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before: missing node → hard exit; missing browser → FileNotFoundError.
After: both try ensure_dependency() first, which prompts interactively
and delegates installation to install.sh --ensure.

ripgrep and ffmpeg already degrade gracefully (grep fallback, skip
conversion) so they don't need wiring.

Also documents the design rationale in dep_ensure.py: detection and
prompting live in Python (portable, instant, UX-integrated); only
the actual installation delegates to install.sh (1900 lines of
battle-tested OS/package-manager logic).
---
 hermes_cli/dep_ensure.py | 14 ++++++++++++--
 hermes_cli/main.py       |  7 +++++++
 tools/browser_tool.py    | 18 +++++++++++++++++-
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/dep_ensure.py b/hermes_cli/dep_ensure.py
index 03ddd80ef84..c06fc6db80a 100644
--- a/hermes_cli/dep_ensure.py
+++ b/hermes_cli/dep_ensure.py
@@ -1,7 +1,17 @@
 """Lazy dependency bootstrapper for non-Python runtime deps.
 
-Wraps install.sh --ensure to install node, browser, ripgrep, ffmpeg
-on first use. Prompts interactively unless told not to.
+Detection and prompting live here in Python — not in install.sh — because:
+  1. shutil.which() works on every platform; install.sh needs bash.
+  2. Detection is instant; spawning bash for a "is node installed?" check is waste.
+  3. Python controls the UX (rich prompts, non-interactive fallback, TTY detection).
+
+install.sh is still the *installation* backend because it has 1900 lines of
+battle-tested OS detection and package-manager logic (apt/brew/pacman/dnf/
+zypper/Termux/…).  Reimplementing that in Python would be huge duplication.
+
+Deps that degrade gracefully (ripgrep → grep fallback, ffmpeg → skip conversion)
+don't need ensure_dependency wired in — only hard-fail sites do (TUI needs node,
+browser tool needs agent-browser).
 """
 from __future__ import annotations
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index bb372c396f1..0b5e79fe9d9 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1042,6 +1042,13 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
             if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK):
                 return env_node
         path = shutil.which(bin)
+        if not path and bin == "node":
+            try:
+                from hermes_cli.dep_ensure import ensure_dependency
+                if ensure_dependency("node"):
+                    path = shutil.which("node")
+            except Exception:
+                pass
         if not path:
             print(f"{bin} not found — install Node.js to use the TUI.")
             sys.exit(1)
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index c01d25a6f0b..b3eb24ee044 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1703,7 +1703,23 @@ def _find_agent_browser() -> str:
         _agent_browser_resolved = True
         return _cached_agent_browser
 
-    # Nothing found — cache the failure so subsequent calls don't re-scan.
+    # Nothing found — try lazy installation before giving up.
+    try:
+        from hermes_cli.dep_ensure import ensure_dependency
+        if ensure_dependency("browser"):
+            recheck = shutil.which("agent-browser")
+            if not recheck and extended_path:
+                recheck = shutil.which("agent-browser", path=extended_path)
+            if not recheck:
+                hermes_nm = str(get_hermes_home() / "node_modules" / ".bin")
+                recheck = shutil.which("agent-browser", path=hermes_nm)
+            if recheck:
+                _cached_agent_browser = recheck
+                _agent_browser_resolved = True
+                return recheck
+    except Exception:
+        pass
+
     _agent_browser_resolved = True
     raise FileNotFoundError(
         "agent-browser CLI not found. Install it with: "

From b1edf3dfc8948b5ff93f42d26395fa6f30393d9f Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:33:35 +0000
Subject: [PATCH 077/218] chore: gitignore hermes_cli/scripts/ (bundled at
 wheel build time)

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 6ae86265a60..37b1f602cc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,6 @@ mini-swe-agent/
 result
 website/static/api/skills-index.json
 models-dev-upstream/
+hermes_cli/tui_dist/*
+hermes_cli/scripts/
+docs/superpowers/*
\ No newline at end of file

From 99b81cd54b99d4c66812b1d076e593f566432065 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:43:20 +0000
Subject: [PATCH 078/218] feat: add `hermes postinstall` command for pip users

One-shot bootstrap that installs non-Python deps (node, browser,
ripgrep, ffmpeg) via ensure_dependency(), then runs setup if no
provider is configured. Closes the gap between `pip install` and
the full user-facing experience.

Also fixes 3 pre-existing test regressions caused by earlier commits:
- test_recommended_update_command: mock detect_install_method for git env
- test_check_for_updates_no_git_dir: now falls back to PyPI, not None
- test_plist_path_includes_node_modules_bin: skip when dir absent
---
 hermes_cli/main.py                            | 31 ++++++++++++++++++-
 tests/hermes_cli/test_managed_installs.py     |  3 +-
 tests/hermes_cli/test_update_check.py         |  7 +++--
 .../hermes_cli/test_update_gateway_restart.py |  5 ++-
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 0b5e79fe9d9..121b77b0f91 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1713,6 +1713,24 @@ def cmd_setup(args):
     run_setup_wizard(args)
 
 
+def cmd_postinstall(args):
+    """One-shot bootstrap for pip users: install non-Python deps + run setup."""
+    from hermes_cli.dep_ensure import ensure_dependency
+
+    print("⚕ Hermes post-install bootstrap")
+    print()
+
+    for dep in ("node", "browser", "ripgrep", "ffmpeg"):
+        ensure_dependency(dep)
+
+    if not _has_any_provider_configured():
+        print()
+        cmd_setup(args)
+    else:
+        print()
+        print("✓ Post-install complete.")
+
+
 def cmd_model(args):
     """Select default model — starts with provider selection, then model picker."""
     _require_tty("model")
@@ -9583,7 +9601,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
         "config", "cron", "curator", "dashboard", "debug", "doctor",
         "dump", "fallback", "gateway", "hooks", "import", "insights",
         "kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
-        "model", "pairing", "plugins", "profile", "proxy", "sessions", "setup",
+        "model", "pairing", "plugins", "postinstall", "profile", "proxy", "sessions", "setup",
         "skills", "slack", "status", "tools", "uninstall", "update",
         "version", "webhook", "whatsapp", "chat",
         # Help-ish invocations — plugin commands not being listed in
@@ -10022,6 +10040,17 @@ def main():
     )
     setup_parser.set_defaults(func=cmd_setup)
 
+    # =========================================================================
+    # postinstall command
+    # =========================================================================
+    postinstall_parser = subparsers.add_parser(
+        "postinstall",
+        help="Bootstrap non-Python deps for pip installs (node, browser, ripgrep, ffmpeg)",
+        description="One-shot post-install for pip users. Installs system "
+        "dependencies that pip cannot provide, then runs setup if needed.",
+    )
+    postinstall_parser.set_defaults(func=cmd_postinstall)
+
     # =========================================================================
     # whatsapp command
     # =========================================================================
diff --git a/tests/hermes_cli/test_managed_installs.py b/tests/hermes_cli/test_managed_installs.py
index c6b5d792ce0..d2cf2947c6d 100644
--- a/tests/hermes_cli/test_managed_installs.py
+++ b/tests/hermes_cli/test_managed_installs.py
@@ -29,7 +29,8 @@ def test_format_managed_message_homebrew(monkeypatch):
 def test_recommended_update_command_defaults_to_hermes_update(monkeypatch):
     monkeypatch.delenv("HERMES_MANAGED", raising=False)
 
-    assert recommended_update_command() == "hermes update"
+    with patch("hermes_cli.config.detect_install_method", return_value="git"):
+        assert recommended_update_command() == "hermes update"
 
 
 def test_cmd_update_blocks_managed_homebrew(monkeypatch, capsys):
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index 2bdc9b24621..92cd2d2e14c 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -59,7 +59,7 @@ def test_check_for_updates_expired_cache(tmp_path, monkeypatch):
 
 
 def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
-    """Returns None when .git directory doesn't exist anywhere."""
+    """Falls back to PyPI check when .git directory doesn't exist anywhere."""
     import hermes_cli.banner as banner
 
     # Create a fake banner.py so the fallback path also has no .git
@@ -70,8 +70,9 @@ def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
     monkeypatch.setattr(banner, "__file__", str(fake_banner))
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     with patch("hermes_cli.banner.subprocess.run") as mock_run:
-        result = banner.check_for_updates()
-    assert result is None
+        with patch("hermes_cli.banner._check_via_pypi", return_value=0):
+            result = banner.check_for_updates()
+    assert result == 0
     mock_run.assert_not_called()
 
 
diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py
index 34c878eca79..b53b1463624 100644
--- a/tests/hermes_cli/test_update_gateway_restart.py
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@@ -178,8 +178,11 @@ class TestLaunchdPlistPath:
             raise AssertionError("PATH key not found in plist")
 
     def test_plist_path_includes_node_modules_bin(self):
+        node_bin_dir = gateway_cli.PROJECT_ROOT / "node_modules" / ".bin"
+        if not node_bin_dir.is_dir():
+            pytest.skip("node_modules/.bin not present in this checkout")
         plist = gateway_cli.generate_launchd_plist()
-        node_bin = str(gateway_cli.PROJECT_ROOT / "node_modules" / ".bin")
+        node_bin = str(node_bin_dir)
         lines = plist.splitlines()
         for i, line in enumerate(lines):
             if "<key>PATH</key>" in line.strip():

From 164a77dec9b74955c17401e9cf79f5470960b015 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:45:46 +0000
Subject: [PATCH 079/218] docs: add pip install path to installation,
 quickstart, updating, and CLI reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document pip install hermes-agent as a first-class install option.
Clarify that PyPI releases track tagged versions (major/minor),
not every commit on main — git installer is for bleeding-edge.
---
 website/docs/getting-started/installation.md | 32 ++++++++++++++++++--
 website/docs/getting-started/quickstart.md   | 10 +++++-
 website/docs/getting-started/updating.md     | 30 ++++++++++++++++--
 website/docs/reference/cli-commands.md       |  4 ++-
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index c8db40a9137..14bd95151c1 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -10,7 +10,30 @@ Get Hermes Agent up and running in under two minutes with the one-line installer
 
 ## Quick Install
 
-### Linux / macOS / WSL2
+### pip (recommended for most users)
+
+```bash
+pip install hermes-agent
+```
+
+This gives you the full Hermes Agent — CLI, web dashboard, and TUI — with zero external dependencies for core usage. Node.js, browser engines, and other optional tools are bootstrapped lazily on first use (e.g. when you run `hermes --tui` or use browser tools).
+
+PyPI releases track **tagged versions** (major and minor releases), not every commit on `main`. If you want bleeding-edge changes as they land, use the git install below.
+
+After installing, run:
+
+```bash
+hermes setup   # interactive wizard — configures your LLM provider and API key
+hermes         # start chatting
+```
+
+:::tip
+If you have [uv](https://docs.astral.sh/uv/) installed, `uv pip install hermes-agent` is faster.
+:::
+
+### One-Line Installer (Linux / macOS / WSL2)
+
+For a git-based install that tracks `main` and gives you the latest changes immediately:
 
 ```bash
 curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
@@ -80,7 +103,8 @@ Where the installer puts things depends on whether you're installing as a normal
 
 | Installer | Code lives at | `hermes` binary | Data directory |
 |---|---|---|---|
-| Per-user (normal) | `~/.hermes/hermes-agent/` | `~/.local/bin/hermes` (symlink) | `~/.hermes/` |
+| pip install | Python site-packages | `~/.local/bin/hermes` (console_scripts) | `~/.hermes/` |
+| Per-user (git installer) | `~/.hermes/hermes-agent/` | `~/.local/bin/hermes` (symlink) | `~/.hermes/` |
 | Root-mode (`sudo curl … \| sudo bash`) | `/usr/local/lib/hermes-agent/` | `/usr/local/bin/hermes` | `/root/.hermes/` (or `$HERMES_HOME`) |
 
 The root-mode **FHS layout** (`/usr/local/lib/…`, `/usr/local/bin/hermes`) matches where other system-wide developer tools land on Linux. It's useful for shared-machine deployments where one system install should serve every user. Per-user config (auth, skills, sessions) still lives under each user's `~/.hermes/` or explicit `HERMES_HOME`.
@@ -108,7 +132,9 @@ hermes setup          # Or run the full setup wizard to configure everything at
 
 ## Prerequisites
 
-The only prerequisite is **Git**. The installer automatically handles everything else:
+**pip install:** No prerequisites beyond Python 3.11+. Everything else is handled automatically.
+
+**Git installer:** The only prerequisite is **Git**. The installer automatically handles everything else:
 
 - **uv** (fast Python package manager)
 - **Python 3.11** (via uv, no sudo needed)
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index f5a089ee724..341618c614c 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -48,7 +48,15 @@ Pick the row that matches your goal:
 
 ## 1. Install Hermes Agent
 
-Run the one-line installer:
+**Option A — pip (simplest):**
+
+```bash
+pip install hermes-agent
+```
+
+PyPI releases track tagged versions (major/minor releases), not every commit on `main`. For bleeding-edge, use Option B.
+
+**Option B — git installer (tracks main branch):**
 
 ```bash
 # Linux / macOS / WSL2 / Android (Termux)
diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index aa2a426db99..83b3eb3221c 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -8,19 +8,36 @@ description: "How to update Hermes Agent to the latest version or uninstall it"
 
 ## Updating
 
+### Git installs
+
 Update to the latest version with a single command:
 
 ```bash
 hermes update
 ```
 
-This pulls the latest code, updates dependencies, and prompts you to configure any new options that were added since your last update.
+This pulls the latest code from `main`, updates dependencies, and prompts you to configure any new options that were added since your last update.
+
+### pip installs
+
+PyPI releases track **tagged versions** (major and minor releases), not every commit on `main`. Check for updates and upgrade with:
+
+```bash
+hermes update --check    # see if a newer release is on PyPI
+hermes update            # runs pip install --upgrade hermes-agent
+```
+
+Or manually:
+
+```bash
+pip install --upgrade hermes-agent    # or: uv pip install --upgrade hermes-agent
+```
 
 :::tip
 `hermes update` automatically detects new configuration options and prompts you to add them. If you skipped that prompt, you can manually run `hermes config check` to see missing options, then `hermes config migrate` to interactively add them.
 :::
 
-### What happens during an update
+### What happens during an update (git installs)
 
 When you run `hermes update`, the following steps occur:
 
@@ -189,12 +206,21 @@ See [Nix Setup](./nix-setup.md) for more details.
 
 ## Uninstalling
 
+### Git installs
+
 ```bash
 hermes uninstall
 ```
 
 The uninstaller gives you the option to keep your configuration files (`~/.hermes/`) for a future reinstall.
 
+### pip installs
+
+```bash
+pip uninstall hermes-agent
+rm -rf ~/.hermes            # Optional — keep if you plan to reinstall
+```
+
 ### Manual Uninstall
 
 ```bash
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index aa12f431b62..3b5b7d2e925 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -76,7 +76,7 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes profile` | Manage profiles — multiple isolated Hermes instances. |
 | `hermes completion` | Print shell completion scripts (bash/zsh/fish). |
 | `hermes version` | Show version information. |
-| `hermes update` | Pull latest code and reinstall dependencies. `--check` prints commit diff without pulling; `--backup` takes a pre-pull `HERMES_HOME` snapshot. |
+| `hermes update` | Pull latest code and reinstall dependencies (git installs), or check PyPI and `pip install --upgrade` (pip installs). `--check` previews without installing; `--backup` takes a pre-pull `HERMES_HOME` snapshot. |
 | `hermes uninstall` | Remove Hermes from the system. |
 
 ## `hermes chat`
@@ -1188,6 +1188,8 @@ hermes update [--check] [--backup] [--restart-gateway]
 
 Pulls the latest `hermes-agent` code and reinstalls dependencies in your venv, then re-runs the post-install hooks (MCP servers, skills sync, completion install). Safe to run on a live install.
 
+**pip installs:** `hermes update` detects pip-based installations automatically — it queries PyPI for the latest release and runs `pip install --upgrade hermes-agent` instead of `git pull`. PyPI releases track tagged versions (major/minor releases), not every commit on `main`. Use `--check` to see if a newer PyPI release is available without installing.
+
 | Option | Description |
 |--------|-------------|
 | `--check` | Print the current commit and the latest `origin/main` commit side by side, and exit 0 if in sync or 1 if behind. Does not pull, install, or restart anything. |

From 47c0efe1c08ba6f0a70d07b7f353e1ad71e69678 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:47:43 +0000
Subject: [PATCH 080/218] refactor: DRY cleanup from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- dep_ensure.py: use get_hermes_home() instead of hand-rolled env var
- dep_ensure.py: add "chrome" to browser name list (was inconsistent with browser_tool.py)
- main.py _cmd_update_check: use detect_install_method() directly instead of redundant .git check
- main.py _cmd_update_pip: build command list directly instead of fragile split() on display string
- banner.py: rename _check_via_pypi → check_via_pypi (cross-module public API)
---
 hermes_cli/banner.py                       |  4 +--
 hermes_cli/dep_ensure.py                   |  6 ++--
 hermes_cli/main.py                         | 39 ++++++++++++----------
 tests/hermes_cli/test_banner_pip_update.py | 24 ++++++-------
 tests/hermes_cli/test_update_check.py      |  2 +-
 5 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 077ee41f0a2..ef592beb7fd 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -199,7 +199,7 @@ def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]:
         return None
 
 
-def _check_via_pypi() -> Optional[int]:
+def check_via_pypi() -> Optional[int]:
     """Compare installed version against PyPI latest.
 
     Returns 0 if up-to-date, 1 if behind, None on failure.
@@ -255,7 +255,7 @@ def check_for_updates() -> Optional[int]:
         if not (repo_dir / ".git").exists():
             repo_dir = hermes_home / "hermes-agent"
         if not (repo_dir / ".git").exists():
-            behind = _check_via_pypi()
+            behind = check_via_pypi()
         else:
             behind = _check_via_local_git(repo_dir)
 
diff --git a/hermes_cli/dep_ensure.py b/hermes_cli/dep_ensure.py
index c06fc6db80a..3312726c36d 100644
--- a/hermes_cli/dep_ensure.py
+++ b/hermes_cli/dep_ensure.py
@@ -41,15 +41,15 @@ _DEP_DESCRIPTIONS = {
 
 
 def _has_system_browser() -> bool:
-    for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
+    for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser", "chrome"):
         if shutil.which(name):
             return True
     return False
 
 
 def _has_hermes_agent_browser() -> bool:
-    hermes_home = os.environ.get("HERMES_HOME", str(Path.home() / ".hermes"))
-    return (Path(hermes_home) / "node_modules" / ".bin" / "agent-browser").is_file()
+    from hermes_constants import get_hermes_home
+    return (get_hermes_home() / "node_modules" / ".bin" / "agent-browser").is_file()
 
 
 def _find_install_script(
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 121b77b0f91..41c4a23f932 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -7419,21 +7419,24 @@ def _finalize_update_output(state):
 
 def _cmd_update_check():
     """Implement ``hermes update --check``: fetch and report without installing."""
+    from hermes_cli.config import detect_install_method
+    method = detect_install_method(PROJECT_ROOT)
+    if method == "pip":
+        from hermes_cli.config import recommended_update_command
+        from hermes_cli.banner import check_via_pypi
+        result = check_via_pypi()
+        if result is None:
+            print("✗ Could not reach PyPI to check for updates.")
+            sys.exit(1)
+        elif result == 0:
+            print("✓ Already up to date.")
+        else:
+            print("⚕ Update available on PyPI.")
+            print(f"  Run '{recommended_update_command()}' to install.")
+        return
+
     git_dir = PROJECT_ROOT / ".git"
     if not git_dir.exists():
-        from hermes_cli.config import detect_install_method, recommended_update_command
-        if detect_install_method(PROJECT_ROOT) == "pip":
-            from hermes_cli.banner import _check_via_pypi
-            result = _check_via_pypi()
-            if result is None:
-                print("✗ Could not reach PyPI to check for updates.")
-                sys.exit(1)
-            elif result == 0:
-                print("✓ Already up to date.")
-            else:
-                print(f"⚕ Update available on PyPI.")
-                print(f"  Run '{recommended_update_command()}' to install.")
-            return
         print("✗ Not a git repository — cannot check for updates.")
         sys.exit(1)
 
@@ -7712,15 +7715,15 @@ def cmd_update(args):
 def _cmd_update_pip(args):
     """Update Hermes via pip (for PyPI installs)."""
     from hermes_cli import __version__
-    from hermes_cli.config import recommended_update_command_for_method
 
     print(f"→ Current version: {__version__}")
     print("→ Checking PyPI for updates...")
 
-    cmd_str = recommended_update_command_for_method("pip")
-    cmd = cmd_str.split()
-    if cmd[0] == "pip":
-        cmd = [sys.executable, "-m", "pip"] + cmd[1:]
+    uv = shutil.which("uv")
+    if uv:
+        cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
+    else:
+        cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]
 
     print(f"→ Running: {' '.join(cmd)}")
     result = subprocess.run(cmd)
diff --git a/tests/hermes_cli/test_banner_pip_update.py b/tests/hermes_cli/test_banner_pip_update.py
index a0e9266f698..205c97488a9 100644
--- a/tests/hermes_cli/test_banner_pip_update.py
+++ b/tests/hermes_cli/test_banner_pip_update.py
@@ -1,29 +1,29 @@
 from unittest.mock import patch
 
 
-def test_check_via_pypi_detects_update():
-    """_check_via_pypi returns 1 when PyPI has newer version."""
-    from hermes_cli.banner import _check_via_pypi
+def testcheck_via_pypi_detects_update():
+    """check_via_pypi returns 1 when PyPI has newer version."""
+    from hermes_cli.banner import check_via_pypi
     with patch("hermes_cli.banner.VERSION", "0.12.0"):
         with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"):
-            result = _check_via_pypi()
+            result = check_via_pypi()
             assert result == 1
 
 
-def test_check_via_pypi_up_to_date():
-    """_check_via_pypi returns 0 when versions match."""
-    from hermes_cli.banner import _check_via_pypi
+def testcheck_via_pypi_up_to_date():
+    """check_via_pypi returns 0 when versions match."""
+    from hermes_cli.banner import check_via_pypi
     with patch("hermes_cli.banner.VERSION", "0.13.0"):
         with patch("hermes_cli.banner._fetch_pypi_latest", return_value="0.13.0"):
-            result = _check_via_pypi()
+            result = check_via_pypi()
             assert result == 0
 
 
-def test_check_via_pypi_network_failure():
-    """_check_via_pypi returns None on network error."""
-    from hermes_cli.banner import _check_via_pypi
+def testcheck_via_pypi_network_failure():
+    """check_via_pypi returns None on network error."""
+    from hermes_cli.banner import check_via_pypi
     with patch("hermes_cli.banner._fetch_pypi_latest", return_value=None):
-        result = _check_via_pypi()
+        result = check_via_pypi()
         assert result is None
 
 
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index 92cd2d2e14c..8a68d6a178d 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -70,7 +70,7 @@ def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
     monkeypatch.setattr(banner, "__file__", str(fake_banner))
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     with patch("hermes_cli.banner.subprocess.run") as mock_run:
-        with patch("hermes_cli.banner._check_via_pypi", return_value=0):
+        with patch("hermes_cli.banner.check_via_pypi", return_value=0):
             result = banner.check_for_updates()
     assert result == 0
     mock_run.assert_not_called()

From a480d345e63b114e9de1e9ceed746b7b9e21f0cb Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Fri, 15 May 2026 13:49:58 +0000
Subject: [PATCH 081/218] docs: add hermes postinstall to installation +
 quickstart, fix update --check description

- installation.md: add tip about `hermes postinstall` for upfront dep install
- quickstart.md: show `hermes postinstall` in pip install flow
- updating.md: fix --check description to mention PyPI path for pip installs
---
 website/docs/getting-started/installation.md | 4 ++++
 website/docs/getting-started/quickstart.md   | 1 +
 website/docs/getting-started/updating.md     | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index 14bd95151c1..a88f4c8bd1c 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -27,6 +27,10 @@ hermes setup   # interactive wizard — configures your LLM provider and API key
 hermes         # start chatting
 ```
 
+:::tip Optional: install everything upfront
+`hermes postinstall` installs Node.js, browser engines, ripgrep, and ffmpeg in one shot — then runs the setup wizard. Use this if you want the full experience (TUI, browser tools, voice) without waiting for lazy installs on first use.
+:::
+
 :::tip
 If you have [uv](https://docs.astral.sh/uv/) installed, `uv pip install hermes-agent` is faster.
 :::
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 341618c614c..80eaf3589ca 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -52,6 +52,7 @@ Pick the row that matches your goal:
 
 ```bash
 pip install hermes-agent
+hermes postinstall     # optional: installs Node.js, browser, ripgrep, ffmpeg + runs setup
 ```
 
 PyPI releases track tagged versions (major/minor releases), not every commit on `main`. For bleeding-edge, use Option B.
diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index 83b3eb3221c..d4ced41a4d7 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -49,7 +49,7 @@ When you run `hermes update`, the following steps occur:
 
 ### Preview-only: `hermes update --check`
 
-Want to know if you're behind `origin/main` before actually pulling? Run `hermes update --check` — it fetches, prints your local commit and the latest remote commit side-by-side, and exits `0` if in sync or `1` if behind. No files are modified, no gateway is restarted. Useful in scripts and cron jobs that gate on "is there an update".
+Want to know if an update is available before pulling? Run `hermes update --check` — for git installs it fetches and compares commits against `origin/main`; for pip installs it queries PyPI for the latest release. No files are modified, no gateway is restarted. Useful in scripts and cron jobs that gate on "is there an update".
 
 ### Full pre-update backup: `--backup`
 

From 233d4170cf7b6421939d4ae2d7adc8f3466c347f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:45:59 -0700
Subject: [PATCH 082/218] docs(xai): link OAuth-over-SSH guide from xAI
 provider surfaces (#26610)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #26592. The new docs/guides/oauth-over-ssh.md page was
linked from the two SSH-specific sections of the xAI Grok OAuth guide
but was missing from the surfaces a user is more likely to hit first:

- guides/xai-grok-oauth.md 'See Also' — add the SSH guide at the top
  with a short qualifier so remote users notice it before clicking
  through.
- integrations/providers.md xAI Grok OAuth callout — append the SSH
  guide link alongside the existing xAI OAuth guide link.
- user-guide/configuration.md xai-oauth tip — same.

Docs build: zero warnings on touched files.
---
 website/docs/guides/xai-grok-oauth.md    | 1 +
 website/docs/integrations/providers.md   | 2 +-
 website/docs/user-guide/configuration.md | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md
index 95167a2430c..67d31c929ad 100644
--- a/website/docs/guides/xai-grok-oauth.md
+++ b/website/docs/guides/xai-grok-oauth.md
@@ -221,6 +221,7 @@ This clears both the singleton OAuth entry in `auth.json` and any credential-poo
 
 ## See Also
 
+- [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md) — required reading if Hermes is on a different machine than your browser
 - [AI Providers reference](../integrations/providers.md)
 - [Environment Variables](../reference/environment-variables.md)
 - [Configuration](../user-guide/configuration.md)
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index e7b2e5ab86d..248d17c5fac 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -331,7 +331,7 @@ When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoin
 
 xAI is wired through the Responses API (`codex_responses` transport) for automatic reasoning support on Grok 4 models — no `reasoning_effort` parameter needed, the server reasons by default. Set `XAI_API_KEY` in `~/.hermes/.env` and pick xAI in `hermes model`, or drop `grok` as a shortcut into `/model grok-4-1-fast-reasoning`.
 
-SuperGrok subscribers can sign in with browser OAuth instead of using an API key — pick **xAI Grok OAuth (SuperGrok Subscription)** in `hermes model`, or run `hermes auth add xai-oauth`. The same OAuth bearer token is automatically reused by direct-to-xAI tools (TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md) for the full flow.
+SuperGrok subscribers can sign in with browser OAuth instead of using an API key — pick **xAI Grok OAuth (SuperGrok Subscription)** in `hermes model`, or run `hermes auth add xai-oauth`. The same OAuth bearer token is automatically reused by direct-to-xAI tools (TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md) for the full flow — and if Hermes runs on a remote host, also see [OAuth over SSH / Remote Hosts](../guides/oauth-over-ssh.md) for the required `ssh -L` tunnel.
 
 When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history.
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index d529c8af687..77e5d74ad42 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -820,7 +820,7 @@ Available providers for auxiliary tasks: `auto`, `main`, plus any provider in th
 :::
 
 :::tip xAI Grok OAuth
-`xai-oauth` logs in via browser OAuth for SuperGrok subscribers (no API key needed). Run `hermes model` and select **xAI Grok OAuth (SuperGrok Subscription)** to authenticate. The same OAuth token is reused for every direct-to-xAI surface (chat, auxiliary tasks, TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md).
+`xai-oauth` logs in via browser OAuth for SuperGrok subscribers (no API key needed). Run `hermes model` and select **xAI Grok OAuth (SuperGrok Subscription)** to authenticate. The same OAuth token is reused for every direct-to-xAI surface (chat, auxiliary tasks, TTS, image gen, video gen, transcription). See the [xAI Grok OAuth guide](../guides/xai-grok-oauth.md), and if Hermes is on a remote host see [OAuth over SSH / Remote Hosts](../guides/oauth-over-ssh.md).
 :::
 
 :::warning `"main"` is for auxiliary tasks only

From 887ba1fb03d78f8922b32e7d17dfb1e0998d9315 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:47:30 -0700
Subject: [PATCH 083/218] ci: reject PRs with no common ancestor on main
 (#26611)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Catches the failure mode that produced #25045: a contributor PR whose
branch had been disconnected from main's history (likely an accidental
'git checkout --orphan' or '.git/' re-init).  GitHub's merge UI does
not refuse merges of unrelated histories, so the PR landed cleanly
with its intended one-file change but its parent-less root commit
(413990c94) got grafted into main as a second root.  The merge
resolution itself was correct — main's content won for every
conflicting file — but ~1500 files' worth of git blame collapsed
onto that single commit.

Implementation: 'git merge-base origin/main HEAD' exits non-zero and
prints nothing when the two commits share no ancestor.  Check both
conditions and fail with a clear message + recovery steps.

Verified: against the historic state of PR #25045 (base 5d90386ba,
head 1149e75db), 'git merge-base' returns empty with exit 1, so the
new check would have rejected it.
---
 .github/workflows/history-check.yml | 58 +++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 .github/workflows/history-check.yml

diff --git a/.github/workflows/history-check.yml b/.github/workflows/history-check.yml
new file mode 100644
index 00000000000..bd66f19404e
--- /dev/null
+++ b/.github/workflows/history-check.yml
@@ -0,0 +1,58 @@
+name: History Check
+
+# Rejects PRs whose branch has no common ancestor with main.
+#
+# In May 2026 PR #25045 was merged from a branch that had been disconnected
+# from main's history (likely an accidental `git checkout --orphan` or
+# `.git/` re-init).  GitHub's merge UI does not refuse merges of unrelated
+# histories, so the PR landed cleanly with the intended one-file change —
+# but its parent-less root commit (413990c94) got grafted into main as a
+# second root, and ~1500 files' worth of `git blame` history collapsed
+# onto that single commit.
+#
+# This check catches the failure mode by requiring `git merge-base` between
+# the PR head and main to be non-empty.
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  check-common-ancestor:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          fetch-depth: 0  # full history both sides for merge-base
+
+      - name: Reject PRs with no common ancestor on main
+        run: |
+          # `git merge-base` exits non-zero AND prints nothing when the two
+          # commits share no ancestor.  We check both conditions explicitly
+          # so the failure message is clear regardless of which signal fires
+          # first.
+          if ! BASE=$(git merge-base origin/main HEAD 2>/dev/null) || [ -z "$BASE" ]; then
+            echo ""
+            echo "::error::This PR has no common ancestor with main."
+            echo ""
+            echo "Your branch's history is disconnected from main.  Common causes:"
+            echo "  - the branch was created with 'git checkout --orphan'"
+            echo "  - '.git/' was re-initialized at some point during the work"
+            echo "  - the branch was force-pushed from an unrelated repository"
+            echo ""
+            echo "Merging an unrelated-history PR grafts a parent-less root commit"
+            echo "into main and collapses git blame for every file in that snapshot."
+            echo "Reference: PR #25045 caused this and re-rooted blame on ~1500"
+            echo "files to a single orphan commit."
+            echo ""
+            echo "To fix, rebase your changes onto current main:"
+            echo "  git fetch origin main"
+            echo "  git checkout -b fix-branch origin/main"
+            echo "  # re-apply your changes (cherry-pick, copy files, etc.)"
+            echo "  git push -f origin fix-branch"
+            exit 1
+          fi
+          echo "::notice::Common ancestor with main: $BASE"

From 42070ecefb9e9da3adec6d536d130d9dc3b82560 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:58:23 -0700
Subject: [PATCH 084/218] feat(skills/notion): overhaul for Notion Developer
 Platform (May 2026) (#26612)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(skills/notion): overhaul for Notion Developer Platform (May 2026)

Notion shipped its Developer Platform on May 13, 2026: ntn CLI, Workers,
Markdown API, bidirectional webhooks, agent tools. The existing skill only
covered curl + integration token CRUD, so it didn't surface any of the new
ergonomics — particularly the /markdown endpoints (much easier for agents
to consume) and the ntn CLI for headless API + Workers management.

This rewrite (v1.0.0 -> v2.0.0):

- Splits setup into Path A (HTTP, cross-platform incl. Windows), Path B
  (ntn CLI on macOS/Linux, with NOTION_API_TOKEN env var for headless),
  and Path C (Windows fallback — HTTP API or WSL2; native ntn is 'coming
  soon').
- Keeps the full curl reference (still the only Windows-compatible path).
- Adds /markdown endpoints — GET and PATCH page-as-markdown, plus POST
  /v1/pages with a markdown body param. Agent-friendly, no CLI required.
- Adds ntn CLI cheat sheet for raw API shorthand, file uploads, and
  workspace flags.
- Adds Notion Workers section: scaffold, tool/webhook capability shapes,
  lifecycle commands. Gated on Business/Enterprise plans + macOS/Linux.
- Adds Notion-flavored Markdown reference (callouts, toggles, columns,
  mentions, colors) for the /markdown endpoints.
- Adds a 'choose the right path' decision table at the bottom.
- Notes the new efficient Notion MCP server as an optional wiring path.

Auto-generated docs page regenerated via
website/scripts/generate-skill-docs.py.

* docs(skills-catalog): update notion description for v2.0.0
---
 skills/productivity/notion/SKILL.md           | 356 ++++++++++++++++--
 website/docs/reference/skills-catalog.md      |   2 +-
 .../productivity/productivity-notion.md       | 354 +++++++++++++++--
 3 files changed, 632 insertions(+), 80 deletions(-)

diff --git a/skills/productivity/notion/SKILL.md b/skills/productivity/notion/SKILL.md
index b645c088f28..83222ffd938 100644
--- a/skills/productivity/notion/SKILL.md
+++ b/skills/productivity/notion/SKILL.md
@@ -1,35 +1,158 @@
 ---
 name: notion
-description: "Notion API via curl: pages, databases, blocks, search."
-version: 1.0.0
+description: "Notion API + ntn CLI: pages, databases, markdown, Workers."
+version: 2.0.0
 author: community
 license: MIT
 platforms: [linux, macos, windows]
-metadata:
-  hermes:
-    tags: [Notion, Productivity, Notes, Database, API]
-    homepage: https://developers.notion.com
 prerequisites:
   env_vars: [NOTION_API_KEY]
+metadata:
+  hermes:
+    tags: [Notion, Productivity, Notes, Database, API, CLI, Workers]
+    homepage: https://developers.notion.com
 ---
 
-# Notion API
+# Notion
 
-Use the Notion API via curl to create, read, update pages, databases (data sources), and blocks. No extra tools needed — just curl and a Notion API key.
+Talk to Notion two ways. Same integration token works for both — pick by what's available.
 
-## Prerequisites
+◆ **`ntn` CLI** — Notion's official CLI. Shorter syntax, one-line file uploads, required for Workers. macOS + Linux only as of May 2026 (Windows support "coming soon"). **Default when installed.**
+◆ **HTTP + curl** — works everywhere including Windows. **Default fallback** when `ntn` isn't installed.
+
+## Setup
+
+### 1. Get an integration token (required for both paths)
 
 1. Create an integration at https://notion.so/my-integrations
 2. Copy the API key (starts with `ntn_` or `secret_`)
-3. Store it in `~/.hermes/.env`:
+3. Store in `~/.hermes/.env`:
    ```
    NOTION_API_KEY=ntn_your_key_here
    ```
-4. **Important:** Share target pages/databases with your integration in Notion (click "..." → "Connect to" → your integration name)
+4. **Share target pages/databases with the integration** in Notion: page menu `...` → `Connect to` → your integration name. Without this, the API returns 404 for that page even though it exists.
+
+### 2. Install `ntn` (preferred path on macOS / Linux)
+
+```bash
+# Recommended
+curl -fsSL https://ntn.dev | bash
+
+# Or via npm (needs Node 22+, npm 10+)
+npm install --global ntn
+
+ntn --version    # verify
+```
+
+**Skip `ntn login` — use the integration token instead.** This works headlessly, no browser needed:
+```bash
+export NOTION_API_TOKEN=$NOTION_API_KEY      # ntn reads NOTION_API_TOKEN
+export NOTION_KEYRING=0                       # don't try to use the OS keychain
+```
+
+Add those exports to your shell profile (or to `~/.hermes/.env`) so every session inherits them.
+
+### 3. Choose path at runtime
+
+```bash
+if command -v ntn >/dev/null 2>&1; then
+  # use ntn
+else
+  # fall back to curl
+fi
+```
+
+Windows users: skip step 2 entirely until native `ntn` ships — Path B works fine. If you want CLI ergonomics now, install `ntn` inside WSL2.
 
 ## API Basics
 
-All requests use this pattern:
+`Notion-Version: 2025-09-03` is required on all HTTP requests. `ntn` handles this for you. In this version, what users call "databases" are called **data sources** in the API.
+
+## Path A — `ntn` CLI (preferred, macOS / Linux)
+
+### Raw API calls (shorthand for curl)
+```bash
+ntn api v1/users                                  # GET
+ntn api v1/pages parent[page_id]=abc123 \         # POST with inline body
+  properties[title][0][text][content]="Notes"
+ntn api v1/pages/abc123 -X PATCH archived:=true   # PATCH; := is non-string (bool/num/null)
+```
+
+Syntax notes:
+- `key=value` — string fields
+- `key[nested]=value` — nested object fields
+- `key:=value` — typed assignment (booleans, numbers, null, arrays)
+
+### Search
+```bash
+ntn api v1/search query="page title"
+```
+
+### Read page metadata
+```bash
+ntn api v1/pages/{page_id}
+```
+
+### Read page as Markdown (agent-friendly)
+```bash
+ntn api v1/pages/{page_id}/markdown
+```
+
+### Read page content as blocks
+```bash
+ntn api v1/blocks/{page_id}/children
+```
+
+### Create page from Markdown
+```bash
+ntn api v1/pages \
+  parent[page_id]=xxx \
+  properties[title][0][text][content]="Notes from meeting" \
+  markdown="# Agenda
+
+- Q3 roadmap
+- Hiring"
+```
+
+### Patch a page with Markdown
+```bash
+ntn api v1/pages/{page_id}/markdown -X PATCH \
+  markdown="## Update
+
+Shipped the prototype."
+```
+
+### Query a database (data source)
+```bash
+ntn api v1/data_sources/{data_source_id}/query -X POST \
+  filter[property]=Status filter[select][equals]=Active
+```
+
+For complex queries with `sorts`, multiple filter clauses, or compound logic, pipe JSON in:
+```bash
+echo '{"filter": {"property": "Status", "select": {"equals": "Active"}}, "sorts": [{"property": "Date", "direction": "descending"}]}' | \
+  ntn api v1/data_sources/{data_source_id}/query -X POST --json -
+```
+
+### File uploads (one-liner — biggest CLI win)
+```bash
+ntn files create < photo.png
+ntn files create --external-url https://example.com/photo.png
+ntn files list
+```
+
+Compare to the 3-step HTTP flow (create upload → PUT bytes → reference).
+
+### Useful env vars
+| Var | Effect |
+|---|---|
+| `NOTION_API_TOKEN` | Auth token (overrides keychain) — set this to your integration token |
+| `NOTION_KEYRING=0` | File-based creds at `~/.config/notion/auth.json` instead of OS keychain |
+| `NOTION_WORKSPACE_ID` | Skip the workspace picker prompt |
+
+## Path B — HTTP + curl (cross-platform, default on Windows)
+
+All requests share this pattern:
 
 ```bash
 curl -s -X GET "https://api.notion.com/v1/..." \
@@ -38,12 +161,9 @@ curl -s -X GET "https://api.notion.com/v1/..." \
   -H "Content-Type: application/json"
 ```
 
-The `Notion-Version` header is required. This skill uses `2025-09-03` (latest). In this version, databases are called "data sources" in the API.
-
-## Common Operations
+On Windows the `curl` shipped with Windows 10+ works as-is. PowerShell users can also use `Invoke-RestMethod`.
 
 ### Search
-
 ```bash
 curl -s -X POST "https://api.notion.com/v1/search" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -52,24 +172,56 @@ curl -s -X POST "https://api.notion.com/v1/search" \
   -d '{"query": "page title"}'
 ```
 
-### Get Page
-
+### Read page metadata
 ```bash
 curl -s "https://api.notion.com/v1/pages/{page_id}" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
   -H "Notion-Version: 2025-09-03"
 ```
 
-### Get Page Content (blocks)
+### Read page as Markdown (agent-friendly)
 
+Easier to feed to a model than block JSON.
+
+```bash
+curl -s "https://api.notion.com/v1/pages/{page_id}/markdown" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03"
+```
+
+### Read page content as blocks (when you need structure)
 ```bash
 curl -s "https://api.notion.com/v1/blocks/{page_id}/children" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
   -H "Notion-Version: 2025-09-03"
 ```
 
-### Create Page in a Database
+### Create page from Markdown
 
+`POST /v1/pages` accepts a `markdown` body param.
+
+```bash
+curl -s -X POST "https://api.notion.com/v1/pages" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "parent": {"page_id": "xxx"},
+    "properties": {"title": [{"text": {"content": "Notes from meeting"}}]},
+    "markdown": "# Agenda\n\n- Q3 roadmap\n- Hiring\n\n## Decisions\n- Ship MVP Friday"
+  }'
+```
+
+### Patch a page with Markdown
+```bash
+curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}/markdown" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03" \
+  -H "Content-Type: application/json" \
+  -d '{"markdown": "## Update\n\nShipped the prototype."}'
+```
+
+### Create page in a database (typed properties)
 ```bash
 curl -s -X POST "https://api.notion.com/v1/pages" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -84,8 +236,7 @@ curl -s -X POST "https://api.notion.com/v1/pages" \
   }'
 ```
 
-### Query a Database
-
+### Query a database (data source)
 ```bash
 curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -97,8 +248,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query"
   }'
 ```
 
-### Create a Database
-
+### Create a database
 ```bash
 curl -s -X POST "https://api.notion.com/v1/data_sources" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -115,8 +265,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources" \
   }'
 ```
 
-### Update Page Properties
-
+### Update page properties
 ```bash
 curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -125,8 +274,7 @@ curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \
   -d '{"properties": {"Status": {"select": {"name": "Done"}}}}'
 ```
 
-### Add Content to a Page
-
+### Append blocks to a page
 ```bash
 curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -139,6 +287,21 @@ curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \
   }'
 ```
 
+### File uploads (3-step flow)
+```bash
+# 1. Create upload
+curl -s -X POST "https://api.notion.com/v1/file_uploads" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03" \
+  -H "Content-Type: application/json" \
+  -d '{"filename": "photo.png", "content_type": "image/png"}'
+
+# 2. PUT bytes to the upload_url returned above
+curl -s -X PUT "{upload_url}" --data-binary @photo.png
+
+# 3. Reference {file_upload_id} in a page/block payload
+```
+
 ## Property Types
 
 Common property formats for database items:
@@ -154,19 +317,132 @@ Common property formats for database items:
 - **Email:** `{"email": "user@example.com"}`
 - **Relation:** `{"relation": [{"id": "page_id"}]}`
 
-## Key Differences in API Version 2025-09-03
+## API Version 2025-09-03 — Databases vs Data Sources
 
-- **Databases → Data Sources:** Use `/data_sources/` endpoints for queries and retrieval
-- **Two IDs:** Each database has both a `database_id` and a `data_source_id`
-  - Use `database_id` when creating pages (`parent: {"database_id": "..."}`)
-  - Use `data_source_id` when querying (`POST /v1/data_sources/{id}/query`)
-- **Search results:** Databases return as `"object": "data_source"` with their `data_source_id`
+- **Databases became data sources.** Use `/data_sources/` endpoints for queries and retrieval.
+- **Two IDs per database:** `database_id` and `data_source_id`.
+  - `database_id` when creating pages: `parent: {"database_id": "..."}`
+  - `data_source_id` when querying: `POST /v1/data_sources/{id}/query`
+- Search returns databases as `"object": "data_source"` with the `data_source_id` field.
+
+## Notion Workers (advanced, requires `ntn`)
+
+Workers are TypeScript programs Notion hosts for you. One worker can expose any combination of:
+- **Syncs** — pull data from external APIs into a Notion database on a schedule (default 30 min).
+- **Tools** — appear as callable tools inside Notion's Custom Agents.
+- **Webhooks** — receive HTTP events from external services (GitHub, Stripe, etc.) and act in Notion.
+
+**Plan / platform gating:**
+- CLI works on all plans. **Deploying Workers requires Business or Enterprise.**
+- `ntn` is macOS/Linux only as of May 2026. Windows users need WSL2 or to wait for native support.
+- Free through August 11, 2026; metered on Notion credits after.
+
+### Minimal Worker
+
+```bash
+ntn workers new my-worker      # scaffold
+cd my-worker
+# Edit src/index.ts
+ntn workers deploy --name my-worker
+```
+
+`src/index.ts`:
+```typescript
+import { Worker } from "@notionhq/workers";
+
+const worker = new Worker();
+export default worker;
+
+worker.tool("greet", {
+  title: "Greet a User",
+  description: "Returns a friendly greeting",
+  inputSchema: { type: "object", properties: { name: { type: "string" } }, required: ["name"] },
+  execute: async ({ name }) => `Hello, ${name}!`,
+});
+```
+
+### Webhook capability
+
+```typescript
+worker.webhook("onGithubPush", {
+  title: "GitHub Push Handler",
+  execute: async (events, { notion }) => {
+    for (const event of events) {
+      // event.body, event.rawBody (for signature verification), event.headers
+      console.log("got delivery", event.deliveryId);
+    }
+  },
+});
+```
+
+After deploy: `ntn workers webhooks list` shows the URL Notion generates. Treat that URL as a secret — anyone with it can POST events unless you add signature verification.
+
+### Worker lifecycle commands
+
+```bash
+ntn workers deploy
+ntn workers list
+ntn workers exec <capability-key> -d '{"name": "world"}'
+ntn workers sync trigger <key>            # run a sync now
+ntn workers sync pause <key>
+ntn workers env set GITHUB_WEBHOOK_SECRET=...
+ntn workers runs list                     # recent invocations
+ntn workers runs logs <run-id>
+ntn workers webhooks list
+```
+
+When asked to build a Worker, scaffold with `ntn workers new`, write the code in `src/index.ts`, set any secrets with `ntn workers env set`, and deploy. Notion's docs at https://developers.notion.com/workers cover the full API surface.
+
+## Notion-Flavored Markdown (used by `/markdown` endpoints)
+
+Standard CommonMark plus XML-like tags for Notion-specific blocks. Use **tabs** for indentation.
+
+**Blocks beyond CommonMark:**
+```
+<callout icon="🎯" color="blue_bg">
+	Ship the MVP by **Friday**.
+</callout>
+
+<details color="gray">
+<summary>Toggle title</summary>
+	Children indented one tab
+</details>
+
+<columns>
+	<column>Left side</column>
+	<column>Right side</column>
+</columns>
+
+<table_of_contents color="gray"/>
+```
+
+**Inline:**
+- Mentions: `<mention-user url="..."/>`, `<mention-page url="...">Title</mention-page>`, `<mention-date start="2026-05-15"/>`
+- Underline: `<span underline="true">text</span>`
+- Color: `<span color="blue">text</span>` or block-level `{color="blue"}` on the first line
+- Math: inline `$x^2$`, block `$$ ... $$`
+- Citations: `[^https://example.com]`
+
+**Colors:** `gray brown orange yellow green blue purple pink red`, plus `*_bg` variants for backgrounds.
+
+Headings 5/6 collapse to H4. Multiple `>` lines render as separate quote blocks — use `<br>` inside a single `>` for multi-line quotes.
+
+## Choosing the Right Path
+
+| Task | mac / Linux | Windows |
+|---|---|---|
+| Read/write pages, search, query databases | `ntn api ...` | curl |
+| Read a page for an agent to summarize | `ntn api v1/pages/{id}/markdown` | curl `/markdown` endpoint |
+| Upload a file | `ntn files create < file` | 3-step HTTP flow |
+| One-off API exploration | `ntn api ...` | curl |
+| Build a sync / webhook / agent tool hosted by Notion | `ntn workers ...` | WSL2 + `ntn workers ...` |
 
 ## Notes
 
-- Page/database IDs are UUIDs (with or without dashes)
-- Rate limit: ~3 requests/second average
-- The API cannot set database view filters — that's UI-only
-- Use `is_inline: true` when creating data sources to embed them in pages
-- Add `-s` flag to curl to suppress progress bars (cleaner output for Hermes)
-- Pipe output through `jq` for readable JSON: `... | jq '.results[0].properties'`
+- Page/database IDs are UUIDs (with or without dashes — both accepted).
+- Rate limit: ~3 requests/second average. The CLI doesn't bypass this.
+- The API cannot set database **view** filters — that's UI-only.
+- Use `"is_inline": true` when creating data sources to embed them in a page.
+- Always pass `-s` to curl to suppress progress bars (cleaner agent output).
+- Pipe JSON through `jq` when reading: `... | jq '.results[0].properties'`.
+- Notion also ships an MCP server now (`Notion MCP`, ~91% more token-efficient on DB ops than the previous version) — wire it via Hermes' MCP support if you want streaming Notion access from inside a session, but the paths above are enough for most one-shot tasks.
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 8adeb3dcf76..c5b205f521d 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -144,7 +144,7 @@ If a skill is missing from this list but present in the repo, the catalog is reg
 | [`linear`](/docs/user-guide/skills/bundled/productivity/productivity-linear) | Linear: manage issues, projects, teams via GraphQL + curl. | `productivity/linear` |
 | [`maps`](/docs/user-guide/skills/bundled/productivity/productivity-maps) | Geocode, POIs, routes, timezones via OpenStreetMap/OSRM. | `productivity/maps` |
 | [`nano-pdf`](/docs/user-guide/skills/bundled/productivity/productivity-nano-pdf) | Edit PDF text/typos/titles via nano-pdf CLI (NL prompts). | `productivity/nano-pdf` |
-| [`notion`](/docs/user-guide/skills/bundled/productivity/productivity-notion) | Notion API via curl: pages, databases, blocks, search. | `productivity/notion` |
+| [`notion`](/docs/user-guide/skills/bundled/productivity/productivity-notion) | Notion API + ntn CLI: pages, databases, markdown, Workers. | `productivity/notion` |
 | [`ocr-and-documents`](/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents) | Extract text from PDFs/scans (pymupdf, marker-pdf). | `productivity/ocr-and-documents` |
 | [`powerpoint`](/docs/user-guide/skills/bundled/productivity/productivity-powerpoint) | Create, read, edit .pptx decks, slides, notes, templates. | `productivity/powerpoint` |
 | [`teams-meeting-pipeline`](/docs/user-guide/skills/bundled/productivity/productivity-teams-meeting-pipeline) | Operate the Teams meeting summary pipeline via Hermes CLI — summarize meetings, inspect pipeline status, replay jobs, manage Microsoft Graph subscriptions. | `productivity/teams-meeting-pipeline` |
diff --git a/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md b/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md
index 7e8fab2f2ba..80487d6b88f 100644
--- a/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md
+++ b/website/docs/user-guide/skills/bundled/productivity/productivity-notion.md
@@ -1,14 +1,14 @@
 ---
-title: "Notion — Notion API via curl: pages, databases, blocks, search"
+title: "Notion — Notion API + ntn CLI: pages, databases, markdown, Workers"
 sidebar_label: "Notion"
-description: "Notion API via curl: pages, databases, blocks, search"
+description: "Notion API + ntn CLI: pages, databases, markdown, Workers"
 ---
 
 {/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
 
 # Notion
 
-Notion API via curl: pages, databases, blocks, search.
+Notion API + ntn CLI: pages, databases, markdown, Workers.
 
 ## Skill metadata
 
@@ -16,11 +16,11 @@ Notion API via curl: pages, databases, blocks, search.
 |---|---|
 | Source | Bundled (installed by default) |
 | Path | `skills/productivity/notion` |
-| Version | `1.0.0` |
+| Version | `2.0.0` |
 | Author | community |
 | License | MIT |
 | Platforms | linux, macos, windows |
-| Tags | `Notion`, `Productivity`, `Notes`, `Database`, `API` |
+| Tags | `Notion`, `Productivity`, `Notes`, `Database`, `API`, `CLI`, `Workers` |
 
 ## Reference: full SKILL.md
 
@@ -28,23 +28,146 @@ Notion API via curl: pages, databases, blocks, search.
 The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
 :::
 
-# Notion API
+# Notion
 
-Use the Notion API via curl to create, read, update pages, databases (data sources), and blocks. No extra tools needed — just curl and a Notion API key.
+Talk to Notion two ways. Same integration token works for both — pick by what's available.
 
-## Prerequisites
+◆ **`ntn` CLI** — Notion's official CLI. Shorter syntax, one-line file uploads, required for Workers. macOS + Linux only as of May 2026 (Windows support "coming soon"). **Default when installed.**
+◆ **HTTP + curl** — works everywhere including Windows. **Default fallback** when `ntn` isn't installed.
+
+## Setup
+
+### 1. Get an integration token (required for both paths)
 
 1. Create an integration at https://notion.so/my-integrations
 2. Copy the API key (starts with `ntn_` or `secret_`)
-3. Store it in `~/.hermes/.env`:
+3. Store in `~/.hermes/.env`:
    ```
    NOTION_API_KEY=ntn_your_key_here
    ```
-4. **Important:** Share target pages/databases with your integration in Notion (click "..." → "Connect to" → your integration name)
+4. **Share target pages/databases with the integration** in Notion: page menu `...` → `Connect to` → your integration name. Without this, the API returns 404 for that page even though it exists.
+
+### 2. Install `ntn` (preferred path on macOS / Linux)
+
+```bash
+# Recommended
+curl -fsSL https://ntn.dev | bash
+
+# Or via npm (needs Node 22+, npm 10+)
+npm install --global ntn
+
+ntn --version    # verify
+```
+
+**Skip `ntn login` — use the integration token instead.** This works headlessly, no browser needed:
+```bash
+export NOTION_API_TOKEN=$NOTION_API_KEY      # ntn reads NOTION_API_TOKEN
+export NOTION_KEYRING=0                       # don't try to use the OS keychain
+```
+
+Add those exports to your shell profile (or to `~/.hermes/.env`) so every session inherits them.
+
+### 3. Choose path at runtime
+
+```bash
+if command -v ntn >/dev/null 2>&1; then
+  # use ntn
+else
+  # fall back to curl
+fi
+```
+
+Windows users: skip step 2 entirely until native `ntn` ships — Path B works fine. If you want CLI ergonomics now, install `ntn` inside WSL2.
 
 ## API Basics
 
-All requests use this pattern:
+`Notion-Version: 2025-09-03` is required on all HTTP requests. `ntn` handles this for you. In this version, what users call "databases" are called **data sources** in the API.
+
+## Path A — `ntn` CLI (preferred, macOS / Linux)
+
+### Raw API calls (shorthand for curl)
+```bash
+ntn api v1/users                                  # GET
+ntn api v1/pages parent[page_id]=abc123 \         # POST with inline body
+  properties[title][0][text][content]="Notes"
+ntn api v1/pages/abc123 -X PATCH archived:=true   # PATCH; := is non-string (bool/num/null)
+```
+
+Syntax notes:
+- `key=value` — string fields
+- `key[nested]=value` — nested object fields
+- `key:=value` — typed assignment (booleans, numbers, null, arrays)
+
+### Search
+```bash
+ntn api v1/search query="page title"
+```
+
+### Read page metadata
+```bash
+ntn api v1/pages/{page_id}
+```
+
+### Read page as Markdown (agent-friendly)
+```bash
+ntn api v1/pages/{page_id}/markdown
+```
+
+### Read page content as blocks
+```bash
+ntn api v1/blocks/{page_id}/children
+```
+
+### Create page from Markdown
+```bash
+ntn api v1/pages \
+  parent[page_id]=xxx \
+  properties[title][0][text][content]="Notes from meeting" \
+  markdown="# Agenda
+
+- Q3 roadmap
+- Hiring"
+```
+
+### Patch a page with Markdown
+```bash
+ntn api v1/pages/{page_id}/markdown -X PATCH \
+  markdown="## Update
+
+Shipped the prototype."
+```
+
+### Query a database (data source)
+```bash
+ntn api v1/data_sources/{data_source_id}/query -X POST \
+  filter[property]=Status filter[select][equals]=Active
+```
+
+For complex queries with `sorts`, multiple filter clauses, or compound logic, pipe JSON in:
+```bash
+echo '{"filter": {"property": "Status", "select": {"equals": "Active"}}, "sorts": [{"property": "Date", "direction": "descending"}]}' | \
+  ntn api v1/data_sources/{data_source_id}/query -X POST --json -
+```
+
+### File uploads (one-liner — biggest CLI win)
+```bash
+ntn files create < photo.png
+ntn files create --external-url https://example.com/photo.png
+ntn files list
+```
+
+Compare to the 3-step HTTP flow (create upload → PUT bytes → reference).
+
+### Useful env vars
+| Var | Effect |
+|---|---|
+| `NOTION_API_TOKEN` | Auth token (overrides keychain) — set this to your integration token |
+| `NOTION_KEYRING=0` | File-based creds at `~/.config/notion/auth.json` instead of OS keychain |
+| `NOTION_WORKSPACE_ID` | Skip the workspace picker prompt |
+
+## Path B — HTTP + curl (cross-platform, default on Windows)
+
+All requests share this pattern:
 
 ```bash
 curl -s -X GET "https://api.notion.com/v1/..." \
@@ -53,12 +176,9 @@ curl -s -X GET "https://api.notion.com/v1/..." \
   -H "Content-Type: application/json"
 ```
 
-The `Notion-Version` header is required. This skill uses `2025-09-03` (latest). In this version, databases are called "data sources" in the API.
-
-## Common Operations
+On Windows the `curl` shipped with Windows 10+ works as-is. PowerShell users can also use `Invoke-RestMethod`.
 
 ### Search
-
 ```bash
 curl -s -X POST "https://api.notion.com/v1/search" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -67,24 +187,56 @@ curl -s -X POST "https://api.notion.com/v1/search" \
   -d '{"query": "page title"}'
 ```
 
-### Get Page
-
+### Read page metadata
 ```bash
 curl -s "https://api.notion.com/v1/pages/{page_id}" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
   -H "Notion-Version: 2025-09-03"
 ```
 
-### Get Page Content (blocks)
+### Read page as Markdown (agent-friendly)
 
+Easier to feed to a model than block JSON.
+
+```bash
+curl -s "https://api.notion.com/v1/pages/{page_id}/markdown" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03"
+```
+
+### Read page content as blocks (when you need structure)
 ```bash
 curl -s "https://api.notion.com/v1/blocks/{page_id}/children" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
   -H "Notion-Version: 2025-09-03"
 ```
 
-### Create Page in a Database
+### Create page from Markdown
 
+`POST /v1/pages` accepts a `markdown` body param.
+
+```bash
+curl -s -X POST "https://api.notion.com/v1/pages" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "parent": {"page_id": "xxx"},
+    "properties": {"title": [{"text": {"content": "Notes from meeting"}}]},
+    "markdown": "# Agenda\n\n- Q3 roadmap\n- Hiring\n\n## Decisions\n- Ship MVP Friday"
+  }'
+```
+
+### Patch a page with Markdown
+```bash
+curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}/markdown" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03" \
+  -H "Content-Type: application/json" \
+  -d '{"markdown": "## Update\n\nShipped the prototype."}'
+```
+
+### Create page in a database (typed properties)
 ```bash
 curl -s -X POST "https://api.notion.com/v1/pages" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -99,8 +251,7 @@ curl -s -X POST "https://api.notion.com/v1/pages" \
   }'
 ```
 
-### Query a Database
-
+### Query a database (data source)
 ```bash
 curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -112,8 +263,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources/{data_source_id}/query"
   }'
 ```
 
-### Create a Database
-
+### Create a database
 ```bash
 curl -s -X POST "https://api.notion.com/v1/data_sources" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -130,8 +280,7 @@ curl -s -X POST "https://api.notion.com/v1/data_sources" \
   }'
 ```
 
-### Update Page Properties
-
+### Update page properties
 ```bash
 curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -140,8 +289,7 @@ curl -s -X PATCH "https://api.notion.com/v1/pages/{page_id}" \
   -d '{"properties": {"Status": {"select": {"name": "Done"}}}}'
 ```
 
-### Add Content to a Page
-
+### Append blocks to a page
 ```bash
 curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \
   -H "Authorization: Bearer $NOTION_API_KEY" \
@@ -154,6 +302,21 @@ curl -s -X PATCH "https://api.notion.com/v1/blocks/{page_id}/children" \
   }'
 ```
 
+### File uploads (3-step flow)
+```bash
+# 1. Create upload
+curl -s -X POST "https://api.notion.com/v1/file_uploads" \
+  -H "Authorization: Bearer $NOTION_API_KEY" \
+  -H "Notion-Version: 2025-09-03" \
+  -H "Content-Type: application/json" \
+  -d '{"filename": "photo.png", "content_type": "image/png"}'
+
+# 2. PUT bytes to the upload_url returned above
+curl -s -X PUT "{upload_url}" --data-binary @photo.png
+
+# 3. Reference {file_upload_id} in a page/block payload
+```
+
 ## Property Types
 
 Common property formats for database items:
@@ -169,19 +332,132 @@ Common property formats for database items:
 - **Email:** `{"email": "user@example.com"}`
 - **Relation:** `{"relation": [{"id": "page_id"}]}`
 
-## Key Differences in API Version 2025-09-03
+## API Version 2025-09-03 — Databases vs Data Sources
 
-- **Databases → Data Sources:** Use `/data_sources/` endpoints for queries and retrieval
-- **Two IDs:** Each database has both a `database_id` and a `data_source_id`
-  - Use `database_id` when creating pages (`parent: {"database_id": "..."}`)
-  - Use `data_source_id` when querying (`POST /v1/data_sources/{id}/query`)
-- **Search results:** Databases return as `"object": "data_source"` with their `data_source_id`
+- **Databases became data sources.** Use `/data_sources/` endpoints for queries and retrieval.
+- **Two IDs per database:** `database_id` and `data_source_id`.
+  - `database_id` when creating pages: `parent: {"database_id": "..."}`
+  - `data_source_id` when querying: `POST /v1/data_sources/{id}/query`
+- Search returns databases as `"object": "data_source"` with the `data_source_id` field.
+
+## Notion Workers (advanced, requires `ntn`)
+
+Workers are TypeScript programs Notion hosts for you. One worker can expose any combination of:
+- **Syncs** — pull data from external APIs into a Notion database on a schedule (default 30 min).
+- **Tools** — appear as callable tools inside Notion's Custom Agents.
+- **Webhooks** — receive HTTP events from external services (GitHub, Stripe, etc.) and act in Notion.
+
+**Plan / platform gating:**
+- CLI works on all plans. **Deploying Workers requires Business or Enterprise.**
+- `ntn` is macOS/Linux only as of May 2026. Windows users need WSL2 or to wait for native support.
+- Free through August 11, 2026; metered on Notion credits after.
+
+### Minimal Worker
+
+```bash
+ntn workers new my-worker      # scaffold
+cd my-worker
+# Edit src/index.ts
+ntn workers deploy --name my-worker
+```
+
+`src/index.ts`:
+```typescript
+import { Worker } from "@notionhq/workers";
+
+const worker = new Worker();
+export default worker;
+
+worker.tool("greet", {
+  title: "Greet a User",
+  description: "Returns a friendly greeting",
+  inputSchema: { type: "object", properties: { name: { type: "string" } }, required: ["name"] },
+  execute: async ({ name }) => `Hello, ${name}!`,
+});
+```
+
+### Webhook capability
+
+```typescript
+worker.webhook("onGithubPush", {
+  title: "GitHub Push Handler",
+  execute: async (events, { notion }) => {
+    for (const event of events) {
+      // event.body, event.rawBody (for signature verification), event.headers
+      console.log("got delivery", event.deliveryId);
+    }
+  },
+});
+```
+
+After deploy: `ntn workers webhooks list` shows the URL Notion generates. Treat that URL as a secret — anyone with it can POST events unless you add signature verification.
+
+### Worker lifecycle commands
+
+```bash
+ntn workers deploy
+ntn workers list
+ntn workers exec <capability-key> -d '{"name": "world"}'
+ntn workers sync trigger <key>            # run a sync now
+ntn workers sync pause <key>
+ntn workers env set GITHUB_WEBHOOK_SECRET=...
+ntn workers runs list                     # recent invocations
+ntn workers runs logs <run-id>
+ntn workers webhooks list
+```
+
+When asked to build a Worker, scaffold with `ntn workers new`, write the code in `src/index.ts`, set any secrets with `ntn workers env set`, and deploy. Notion's docs at https://developers.notion.com/workers cover the full API surface.
+
+## Notion-Flavored Markdown (used by `/markdown` endpoints)
+
+Standard CommonMark plus XML-like tags for Notion-specific blocks. Use **tabs** for indentation.
+
+**Blocks beyond CommonMark:**
+```
+<callout icon="🎯" color="blue_bg">
+	Ship the MVP by **Friday**.
+</callout>
+
+<details color="gray">
+<summary>Toggle title</summary>
+	Children indented one tab
+</details>
+
+<columns>
+	<column>Left side</column>
+	<column>Right side</column>
+</columns>
+
+<table_of_contents color="gray"/>
+```
+
+**Inline:**
+- Mentions: `<mention-user url="..."/>`, `<mention-page url="...">Title</mention-page>`, `<mention-date start="2026-05-15"/>`
+- Underline: `<span underline="true">text</span>`
+- Color: `<span color="blue">text</span>` or block-level `{color="blue"}` on the first line
+- Math: inline `$x^2$`, block `$$ ... $$`
+- Citations: `[^https://example.com]`
+
+**Colors:** `gray brown orange yellow green blue purple pink red`, plus `*_bg` variants for backgrounds.
+
+Headings 5/6 collapse to H4. Multiple `>` lines render as separate quote blocks — use `<br>` inside a single `>` for multi-line quotes.
+
+## Choosing the Right Path
+
+| Task | mac / Linux | Windows |
+|---|---|---|
+| Read/write pages, search, query databases | `ntn api ...` | curl |
+| Read a page for an agent to summarize | `ntn api v1/pages/{id}/markdown` | curl `/markdown` endpoint |
+| Upload a file | `ntn files create < file` | 3-step HTTP flow |
+| One-off API exploration | `ntn api ...` | curl |
+| Build a sync / webhook / agent tool hosted by Notion | `ntn workers ...` | WSL2 + `ntn workers ...` |
 
 ## Notes
 
-- Page/database IDs are UUIDs (with or without dashes)
-- Rate limit: ~3 requests/second average
-- The API cannot set database view filters — that's UI-only
-- Use `is_inline: true` when creating data sources to embed them in pages
-- Add `-s` flag to curl to suppress progress bars (cleaner output for Hermes)
-- Pipe output through `jq` for readable JSON: `... | jq '.results[0].properties'`
+- Page/database IDs are UUIDs (with or without dashes — both accepted).
+- Rate limit: ~3 requests/second average. The CLI doesn't bypass this.
+- The API cannot set database **view** filters — that's UI-only.
+- Use `"is_inline": true` when creating data sources to embed them in a page.
+- Always pass `-s` to curl to suppress progress bars (cleaner agent output).
+- Pipe JSON through `jq` when reading: `... | jq '.results[0].properties'`.
+- Notion also ships an MCP server now (`Notion MCP`, ~91% more token-efficient on DB ops than the previous version) — wire it via Hermes' MCP support if you want streaming Notion access from inside a session, but the paths above are enough for most one-shot tasks.

From 2d7182f72c398496db60de5c18f8554d7ecc6d82 Mon Sep 17 00:00:00 2001
From: sprmn24 <oncuevtv@gmail.com>
Date: Fri, 15 May 2026 18:53:52 +0300
Subject: [PATCH 085/218] fix(delegate): move heartbeat thread start inside try
 block to prevent orphan
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_heartbeat_thread.start() was called before the try/finally block that
contains _heartbeat_stop.set(). If _register_subagent() or any code
between .start() and try: raised an exception, the finally block would
never run — leaving the heartbeat thread as an orphan that continues
calling _touch_activity() on the parent agent, incorrectly resetting
gateway timeout counters.

Move _heartbeat_thread.start() to be the first statement inside the
try block so the finally block always reaches _heartbeat_stop.set()
regardless of how the child run completes or fails.

Root cause: heartbeat start outside try/finally scope
Impact: orphan heartbeat thread incorrectly resets parent gateway timeouts
---
 tools/delegate_tool.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index f4da5127a18..2cdce9cae64 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -1431,7 +1431,6 @@ def _run_single_child(
                 pass
 
     _heartbeat_thread = threading.Thread(target=_heartbeat_loop, daemon=True)
-    _heartbeat_thread.start()
 
     # Register the live agent in the module-level registry so the TUI can
     # target it by subagent_id (kill, pause, status queries).  Unregistered
@@ -1462,6 +1461,7 @@ def _run_single_child(
         )
 
     try:
+        _heartbeat_thread.start()
         if child_progress_cb:
             try:
                 child_progress_cb("subagent.start", preview=goal)

From 6068363311b861ad0bb411bfffe5958bf8b6d142 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 15:01:09 -0700
Subject: [PATCH 086/218] fix(delegate): guard heartbeat join against unstarted
 thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pairs with the prior commit (start() now inside the try block).  If
threading.Thread.start() itself raises (OS thread exhaustion under
heavy delegation fanout), the finally would call .join() on a
never-started thread, which raises RuntimeError("cannot join thread
before it is started") — trading one rare bug for another.

Thread.ident is None until start() succeeds, so gate the join on it.
---
 tools/delegate_tool.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 2cdce9cae64..f3a037c4341 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -1836,9 +1836,13 @@ def _run_single_child(
 
     finally:
         # Stop the heartbeat thread so it doesn't keep touching parent activity
-        # after the child has finished (or failed).
+        # after the child has finished (or failed).  Guard the join: .start()
+        # now lives inside the try block, so if it raised (OS thread
+        # exhaustion) the thread was never started and Thread.join() would
+        # raise RuntimeError.  ident is None until start() succeeds.
         _heartbeat_stop.set()
-        _heartbeat_thread.join(timeout=5)
+        if _heartbeat_thread.ident is not None:
+            _heartbeat_thread.join(timeout=5)
 
         # Drop the TUI-facing registry entry.  Safe to call even if the
         # child was never registered (e.g. ID missing on test doubles).

From 7fee1f61eb52d1706af04c9606ee1a2e7ef3afc3 Mon Sep 17 00:00:00 2001
From: sprmn24 <oncuevtv@gmail.com>
Date: Fri, 15 May 2026 18:28:45 +0300
Subject: [PATCH 087/218] fix(memory): eliminate TOCTOU race in Windows file
 lock creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Windows (msvcrt path), _file_lock() first checked if the lock file
existed and wrote it with write_text(), then opened it with open('r+').
Between these two calls, another process could delete the file causing
open('r+') to raise FileNotFoundError — uncaught, leaving memory writes
to proceed without holding the lock, risking data corruption.

Replace the three-line sequence with a single open('a+', ...) call which
atomically creates the file if missing or opens it if it exists, closing
the TOCTOU window entirely. The existing fd.seek(0) before msvcrt.locking()
is preserved and sufficient for correct lock byte positioning.

Root cause: TOCTOU between lock_path.write_text() and open('r+')
Impact: concurrent memory writes on Windows could corrupt MEMORY.md
---
 tools/memory_tool.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/memory_tool.py b/tools/memory_tool.py
index 236760a464a..42737f66c4f 100644
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -156,10 +156,7 @@ class MemoryStore:
             yield
             return
 
-        if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0):
-            lock_path.write_text(" ", encoding="utf-8")
-
-        fd = open(lock_path, "r+" if msvcrt else "a+", encoding="utf-8")
+        fd = open(lock_path, "a+", encoding="utf-8")
         try:
             if fcntl:
                 fcntl.flock(fd, fcntl.LOCK_EX)

From 4aec25bc4411edb4563292cadbd02c365c846286 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 14:58:28 -0700
Subject: [PATCH 088/218] fix(windows): stop spamming cwd-missing +
 tirith-spawn warnings on every terminal call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two log-spam fixes surfaced by a Windows user (Git Bash + Python 3.11.9):

1. LocalEnvironment cwd warn spam
   ============================
   Git Bash's `pwd -P` emits paths like `/c/Users/x`. The base-class
   `_extract_cwd_from_output` was assigning this verbatim to `self.cwd`
   without validation, then `_resolve_safe_cwd`'s `os.path.isdir(/c/...)`
   returned False on Windows, triggering:

       LocalEnvironment cwd '/c/Users/NVIDIA' is missing on disk;
       falling back to '/' so terminal commands keep working.

   ...on every terminal call. The pre-existing Windows-path translation
   inside `_run_bash` ran AFTER the safe-cwd check, so it could never
   prevent the warning.

   Fix:
   - New `_msys_to_windows_path` helper (idempotent, no-op off Windows).
   - `_resolve_safe_cwd` normalizes before `isdir`, so a valid MSYS path
     is recognized as the real directory it points at.
   - `LocalEnvironment._update_cwd` and a new override of
     `_extract_cwd_from_output` translate + validate before mutating
     `self.cwd`. Stale / non-existent marker paths roll back to the
     previous cwd instead of clobbering it.
   - The fallback warning still fires when the directory really is gone
     (deletion-recovery scenario from #17558 still covered).

2. tirith spawn-failed warn spam
   =============================
   When tirith isn't installed (background install in flight, or marked
   failed for the day) and the configured path stays as the bare string
   `tirith`, every `subprocess.run([tirith_path, ...])` raises OSError
   and logged:

       tirith spawn failed: [WinError 2] The system cannot find the file specified

   ...on every command. fail_open=True means behaviour is correct, but
   the log noise is severe.

   Fix:
   - `_warn_once(key, ...)` thread-safe dedupe helper.
   - Three hot-path warnings (`tirith path resolved to None`,
     `tirith spawn failed: ...`, `tirith timed out after Ns`) now log
     once per (exception class, errno) / timeout-value / path-none key.
   - Dedupe set is cleared on `_clear_install_failed` so a successful
     install lets a subsequent failure surface again.

Tests
=====
- `tests/tools/test_local_env_windows_msys.py`: 12 tests covering the
  MSYS→Windows translator, the resolve fast-path, update_cwd validation,
  and extract_cwd_from_output rollback.
- `tests/tools/test_tirith_security.py`: 4 new dedupe tests (15 spawn
  failures → 1 log line; distinct exc types → 2 lines; timeout dedupe;
  path-None dedupe).

Targeted runs:
  test_local_env_windows_msys.py      12 passed
  test_local_env_cwd_recovery.py       7 passed (pre-existing, no regressions)
  test_tirith_security.py             67 passed (63 pre-existing + 4 new)
  test_base_environment + local_*    37 passed (no regressions)
  test_local_env_blocklist + neighbours  114 passed

Reported via Hermes log capture: 19× cwd warnings + 15× tirith warnings
in a single short session.
---
 tests/tools/test_local_env_windows_msys.py | 200 +++++++++++++++++++++
 tests/tools/test_tirith_security.py        | 117 ++++++++++++
 tools/environments/local.py                |  85 +++++++--
 tools/tirith_security.py                   |  53 +++++-
 4 files changed, 441 insertions(+), 14 deletions(-)
 create mode 100644 tests/tools/test_local_env_windows_msys.py

diff --git a/tests/tools/test_local_env_windows_msys.py b/tests/tools/test_local_env_windows_msys.py
new file mode 100644
index 00000000000..6987c965af6
--- /dev/null
+++ b/tests/tools/test_local_env_windows_msys.py
@@ -0,0 +1,200 @@
+"""Tests for the Windows / Git Bash MSYS-path normalization in
+``LocalEnvironment``.
+
+Background
+----------
+On Windows, ``pwd -P`` inside Git Bash emits paths like
+``/c/Users/NVIDIA``. ``subprocess.Popen(..., cwd=...)`` only accepts
+native Windows paths (``C:\\Users\\NVIDIA``), and the validation done
+by ``_resolve_safe_cwd`` was also checking the MSYS form against
+``os.path.isdir``, which returns ``False`` on Windows. The combined
+effect was a warning logged on every single terminal call:
+
+    LocalEnvironment cwd '/c/Users/NVIDIA' is missing on disk;
+    falling back to '/' so terminal commands keep working.
+
+These tests fake the Windows env on Linux CI by patching ``_IS_WINDOWS``
+and ``os.path.isdir`` so the MSYS path tests as "missing" exactly like
+on the real OS.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from tools.environments import local as local_mod
+from tools.environments.local import (
+    LocalEnvironment,
+    _msys_to_windows_path,
+    _resolve_safe_cwd,
+)
+
+
+# ---------------------------------------------------------------------------
+# _msys_to_windows_path — pure-function unit tests
+# ---------------------------------------------------------------------------
+
+class TestMsysToWindowsPath:
+    def test_noop_on_non_windows(self, monkeypatch):
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", False)
+        # On a non-Windows host the function must never rewrite the path
+        # — POSIX-style paths are real paths there.
+        assert _msys_to_windows_path("/c/Users/NVIDIA") == "/c/Users/NVIDIA"
+        assert _msys_to_windows_path("/home/teknium") == "/home/teknium"
+
+    def test_translates_drive_path(self, monkeypatch):
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+        assert _msys_to_windows_path("/c/Users/NVIDIA") == r"C:\Users\NVIDIA"
+        assert _msys_to_windows_path("/d/Projects/foo bar") == r"D:\Projects\foo bar"
+
+    def test_translates_bare_drive_root(self, monkeypatch):
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+        # Bare "/c" alone should resolve to the drive root.
+        assert _msys_to_windows_path("/c") == "C:\\"
+        # Trailing slash on the drive letter is also a root.
+        assert _msys_to_windows_path("/c/") == "C:\\"
+
+    def test_idempotent_on_already_windows_path(self, monkeypatch):
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+        assert _msys_to_windows_path(r"C:\Users\NVIDIA") == r"C:\Users\NVIDIA"
+
+    def test_does_not_translate_multi_char_first_segment(self, monkeypatch):
+        """``/tmp/foo`` and ``/home/x`` must NOT be misread as drive paths
+        just because they start with ``/`` and a single letter — the regex
+        only matches when the first segment is exactly one character."""
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+        assert _msys_to_windows_path("/tmp/foo") == "/tmp/foo"
+        assert _msys_to_windows_path("/home/x") == "/home/x"
+
+    def test_empty_string(self, monkeypatch):
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+        assert _msys_to_windows_path("") == ""
+
+
+# ---------------------------------------------------------------------------
+# _resolve_safe_cwd — Windows fast path
+# ---------------------------------------------------------------------------
+
+class TestResolveSafeCwdWindows:
+    def test_msys_path_resolves_to_native_when_native_exists(
+        self, monkeypatch, tmp_path,
+    ):
+        """The whole point of this fix: a Git Bash ``/c/Users/x`` value
+        should resolve to its native equivalent if that native dir exists,
+        WITHOUT falling back to the temp dir."""
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+
+        # tmp_path is a real native dir on the test host. Build a fake
+        # MSYS form pointing at it and prove the resolver finds it.
+        native = str(tmp_path)
+        # Construct a synthetic MSYS form for whatever tmp_path is.
+        # On Linux CI tmp_path is /tmp/... ; the resolver shouldn't even
+        # try to translate that (regex won't match), so emulate the
+        # mapping by pointing the translator at the real native dir.
+        with patch.object(
+            local_mod, "_msys_to_windows_path", return_value=native
+        ):
+            assert _resolve_safe_cwd("/c/whatever") == native
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: _update_cwd via marker file (Windows simulation)
+# ---------------------------------------------------------------------------
+
+class TestUpdateCwdWindowsMsys:
+    def test_marker_file_msys_path_stored_in_native_form(
+        self, monkeypatch, tmp_path,
+    ):
+        """When Git Bash writes ``/c/Users/x`` to the cwd marker file on
+        Windows, ``_update_cwd`` must translate to native form before
+        validating and storing — otherwise ``os.path.isdir`` rejects a
+        perfectly real directory."""
+        original = tmp_path / "starting"
+        original.mkdir()
+
+        # Fake Windows for the test
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+
+        with patch.object(
+            LocalEnvironment, "init_session", autospec=True, return_value=None
+        ):
+            env = LocalEnvironment(cwd=str(original), timeout=10)
+
+        # Pretend Git Bash wrote an MSYS path that maps to tmp_path/"next"
+        new_dir = tmp_path / "next"
+        new_dir.mkdir()
+
+        with open(env._cwd_file, "w") as f:
+            f.write("/c/whatever/from/bash")
+
+        # Translate the synthetic MSYS string to the real native dir.
+        def fake_translate(p):
+            if p == "/c/whatever/from/bash":
+                return str(new_dir)
+            return p
+
+        with patch.object(local_mod, "_msys_to_windows_path", side_effect=fake_translate):
+            env._update_cwd({"output": "", "returncode": 0})
+
+        assert env.cwd == str(new_dir)
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: _extract_cwd_from_output rollback when marker is invalid
+# ---------------------------------------------------------------------------
+
+class TestExtractCwdFromOutputWindowsMsys:
+    def test_stale_msys_marker_does_not_clobber_cwd(self, monkeypatch, tmp_path):
+        """When the cwd marker in stdout points at a non-existent path,
+        ``LocalEnvironment._extract_cwd_from_output`` must roll back to
+        the previous cwd instead of propagating a bad value."""
+        original = tmp_path / "starting"
+        original.mkdir()
+
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+
+        with patch.object(
+            LocalEnvironment, "init_session", autospec=True, return_value=None
+        ):
+            env = LocalEnvironment(cwd=str(original), timeout=10)
+
+        marker = env._cwd_marker
+        result = {
+            "output": f"some command output\n{marker}/c/no/such/path{marker}\n",
+            "returncode": 0,
+        }
+
+        # Translation produces a path that doesn't exist on disk → rollback.
+        with patch.object(
+            local_mod,
+            "_msys_to_windows_path",
+            return_value=str(tmp_path / "definitely-does-not-exist"),
+        ):
+            env._extract_cwd_from_output(result)
+
+        assert env.cwd == str(original)
+
+    def test_valid_msys_marker_normalized_to_native(self, monkeypatch, tmp_path):
+        original = tmp_path / "starting"
+        original.mkdir()
+        new_dir = tmp_path / "next"
+        new_dir.mkdir()
+
+        monkeypatch.setattr(local_mod, "_IS_WINDOWS", True)
+
+        with patch.object(
+            LocalEnvironment, "init_session", autospec=True, return_value=None
+        ):
+            env = LocalEnvironment(cwd=str(original), timeout=10)
+
+        marker = env._cwd_marker
+        result = {
+            "output": f"x\n{marker}/c/whatever{marker}\n",
+            "returncode": 0,
+        }
+
+        with patch.object(local_mod, "_msys_to_windows_path", return_value=str(new_dir)):
+            env._extract_cwd_from_output(result)
+
+        assert env.cwd == str(new_dir)
diff --git a/tests/tools/test_tirith_security.py b/tests/tools/test_tirith_security.py
index 20d20ccfa11..ecaf4f4e639 100644
--- a/tests/tools/test_tirith_security.py
+++ b/tests/tools/test_tirith_security.py
@@ -1007,3 +1007,120 @@ class TestHermesHomeIsolation:
             expected = os.path.join(os.path.expanduser("~"), ".hermes")
             result = _get_hermes_home()
         assert result == expected
+
+
+# ---------------------------------------------------------------------------
+# Warn-once dedupe (issue: tirith spawn failed spamming on Windows)
+# ---------------------------------------------------------------------------
+
+class TestSpawnWarningDedup:
+    """When tirith isn't installed yet (background install in flight, or
+    install marked failed), every terminal command spammed an identical
+    ``tirith spawn failed: [WinError 2]`` warning to ``errors.log``. The
+    dedupe set in ``_warn_once`` collapses repeats by ``(exc class, errno)``
+    while still surfacing the first occurrence so users see the failure.
+    """
+
+    @patch("tools.tirith_security.subprocess.run")
+    @patch("tools.tirith_security._load_security_config")
+    def test_repeated_spawn_failure_logs_once(self, mock_cfg, mock_run, caplog):
+        mock_cfg.return_value = {
+            "tirith_enabled": True, "tirith_path": "tirith",
+            "tirith_timeout": 5, "tirith_fail_open": True,
+        }
+        mock_run.side_effect = FileNotFoundError("[WinError 2]")
+        # Fresh dedupe state — clear any keys left by other tests.
+        _tirith_mod._reset_spawn_warning_state()
+
+        with caplog.at_level("WARNING", logger="tools.tirith_security"):
+            for _ in range(15):
+                result = check_command_security("echo hi")
+                # Behavior must remain the same on every call —
+                # fail-open allow, with the exception captured in summary.
+                assert result["action"] == "allow"
+                assert "unavailable" in result["summary"]
+
+        spawn_warnings = [
+            rec for rec in caplog.records
+            if "tirith spawn failed" in rec.message
+        ]
+        assert len(spawn_warnings) == 1, (
+            f"expected exactly 1 spawn-failed warning across 15 commands, "
+            f"got {len(spawn_warnings)}: {[r.message for r in spawn_warnings]}"
+        )
+
+    @patch("tools.tirith_security.subprocess.run")
+    @patch("tools.tirith_security._load_security_config")
+    def test_distinct_exception_types_each_log_once(self, mock_cfg, mock_run, caplog):
+        """``FileNotFoundError`` and ``PermissionError`` are distinct
+        failure modes and each deserves its own first-occurrence log
+        line; the dedupe key includes the exception class."""
+        mock_cfg.return_value = {
+            "tirith_enabled": True, "tirith_path": "tirith",
+            "tirith_timeout": 5, "tirith_fail_open": True,
+        }
+        _tirith_mod._reset_spawn_warning_state()
+
+        with caplog.at_level("WARNING", logger="tools.tirith_security"):
+            mock_run.side_effect = FileNotFoundError("[WinError 2]")
+            for _ in range(3):
+                check_command_security("a")
+            mock_run.side_effect = PermissionError("denied")
+            for _ in range(3):
+                check_command_security("b")
+
+        spawn_warnings = [
+            rec for rec in caplog.records
+            if "tirith spawn failed" in rec.message
+        ]
+        assert len(spawn_warnings) == 2, (
+            f"expected 2 distinct first-occurrence warnings, "
+            f"got {len(spawn_warnings)}"
+        )
+
+    @patch("tools.tirith_security.subprocess.run")
+    @patch("tools.tirith_security._load_security_config")
+    def test_repeated_timeout_logs_once(self, mock_cfg, mock_run, caplog):
+        mock_cfg.return_value = {
+            "tirith_enabled": True, "tirith_path": "tirith",
+            "tirith_timeout": 5, "tirith_fail_open": True,
+        }
+        mock_run.side_effect = subprocess.TimeoutExpired(cmd="tirith", timeout=5)
+        _tirith_mod._reset_spawn_warning_state()
+
+        with caplog.at_level("WARNING", logger="tools.tirith_security"):
+            for _ in range(10):
+                result = check_command_security("slow")
+                assert result["action"] == "allow"
+
+        timeout_warnings = [
+            rec for rec in caplog.records
+            if "tirith timed out" in rec.message
+        ]
+        assert len(timeout_warnings) == 1
+
+    @patch("tools.tirith_security._load_security_config")
+    def test_path_none_logs_once(self, mock_cfg, caplog):
+        """``_resolve_tirith_path`` returning ``None`` (explicit path set
+        but resolver returned None — unusual) should not spam the log
+        either."""
+        mock_cfg.return_value = {
+            "tirith_enabled": True, "tirith_path": "tirith",
+            "tirith_timeout": 5, "tirith_fail_open": True,
+        }
+        _tirith_mod._reset_spawn_warning_state()
+
+        with patch(
+            "tools.tirith_security._resolve_tirith_path", return_value=None
+        ):
+            with caplog.at_level("WARNING", logger="tools.tirith_security"):
+                for _ in range(10):
+                    result = check_command_security("echo")
+                    assert result["action"] == "allow"
+                    assert "tirith path unavailable" in result["summary"]
+
+        none_warnings = [
+            rec for rec in caplog.records
+            if "tirith path resolved to None" in rec.message
+        ]
+        assert len(none_warnings) == 1
diff --git a/tools/environments/local.py b/tools/environments/local.py
index 7aa75a62d0c..3b9d65449fa 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -18,18 +18,44 @@ _IS_WINDOWS = platform.system() == "Windows"
 logger = logging.getLogger(__name__)
 
 
+def _msys_to_windows_path(cwd: str) -> str:
+    """Translate a Git Bash / MSYS-style POSIX path (``/c/Users/x``) to the
+    native Windows form (``C:\\Users\\x``) so ``os.path.isdir`` and
+    ``subprocess.Popen(..., cwd=...)`` can find it.
+
+    No-ops on non-Windows hosts or for paths that aren't in MSYS form.
+    Returns the input unchanged when no translation applies. This is
+    idempotent — calling it on an already-Windows path returns it as-is.
+    """
+    if not _IS_WINDOWS or not cwd:
+        return cwd
+    # Match leading "/<single letter>/" or exactly "/<letter>" (bare drive root).
+    m = re.match(r'^/([a-zA-Z])(/.*)?$', cwd)
+    if not m:
+        return cwd
+    drive = m.group(1).upper()
+    tail = (m.group(2) or "").replace('/', '\\')
+    return f"{drive}:{tail or chr(92)}"  # chr(92) = backslash, avoid raw-string escape
+
+
 def _resolve_safe_cwd(cwd: str) -> str:
     """Return ``cwd`` if it exists as a directory, else the nearest existing
     ancestor.  Falls back to ``tempfile.gettempdir()`` only if walking up the
     path can't find any existing directory (effectively never on a healthy
     filesystem, but cheap belt-and-braces).
 
+    On Windows, also normalizes Git Bash / MSYS-style POSIX paths
+    (``/c/Users/x``) to native Windows form before the isdir check so a
+    perfectly valid ``pwd -P`` result from bash doesn't get rejected as
+    "missing" (see ``_msys_to_windows_path``).
+
     Used by ``_run_bash`` to recover when the configured cwd is gone — most
     commonly because a previous tool call deleted its own working directory
     (issue #17558).  Without this guard, ``subprocess.Popen(..., cwd=...)``
     raises ``FileNotFoundError`` before bash starts, wedging every subsequent
     terminal call until the gateway restarts.
     """
+    cwd = _msys_to_windows_path(cwd) if _IS_WINDOWS else cwd
     if cwd and os.path.isdir(cwd):
         return cwd
     parent = os.path.dirname(cwd) if cwd else ""
@@ -455,21 +481,27 @@ class LocalEnvironment(BaseEnvironment):
         # (issue #17558).  Popen would otherwise raise FileNotFoundError on
         # the cwd before bash starts, wedging every subsequent call until the
         # gateway restarts.
+        #
+        # On Windows, ``_resolve_safe_cwd`` also normalises Git Bash-style
+        # POSIX paths (``/c/Users/...``) to native form so a perfectly valid
+        # ``pwd -P`` result from bash isn't mistakenly treated as "missing"
+        # and spammed as a warning on every command.
         safe_cwd = _resolve_safe_cwd(self.cwd)
         if safe_cwd != self.cwd:
-            logger.warning(
-                "LocalEnvironment cwd %r is missing on disk; "
-                "falling back to %r so terminal commands keep working.",
-                self.cwd,
-                safe_cwd,
-            )
+            # MSYS → Windows translation alone shouldn't surface as a warning
+            # (it's a benign normalization, not a recovery). Only warn when
+            # the directory really doesn't exist on disk.
+            normalized = _msys_to_windows_path(self.cwd) if _IS_WINDOWS else self.cwd
+            if safe_cwd != normalized:
+                logger.warning(
+                    "LocalEnvironment cwd %r is missing on disk; "
+                    "falling back to %r so terminal commands keep working.",
+                    self.cwd,
+                    safe_cwd,
+                )
             self.cwd = safe_cwd
 
-        # On Windows, self.cwd may be a Git Bash-style path (/c/Users/...)
-        # from pwd output. subprocess.Popen needs a native Windows path.
         _popen_cwd = self.cwd
-        if _IS_WINDOWS and _popen_cwd and re.match(r'^/[a-zA-Z]/', _popen_cwd):
-            _popen_cwd = _popen_cwd[1].upper() + ':' + _popen_cwd[2:].replace('/', '\\')
 
         proc = subprocess.Popen(
             args,
@@ -571,10 +603,19 @@ class LocalEnvironment(BaseEnvironment):
         ``pwd -P`` on a deleted cwd can leave a stale value in the marker
         file, and propagating it would re-wedge the next ``Popen``.  The
         ``_run_bash`` recovery path will resolve a safe fallback if needed.
+
+        On Windows, the value written by Git Bash's ``pwd -P`` is in
+        MSYS form (``/c/Users/x``). Translate it to native Windows form
+        before validating with ``os.path.isdir`` and before storing on
+        ``self.cwd``; otherwise the isdir check rejects every valid
+        result and ``_run_bash`` later prints a misleading "cwd is
+        missing" warning on every command.
         """
         try:
             with open(self._cwd_file, encoding="utf-8") as f:
                 cwd_path = f.read().strip()
+            if _IS_WINDOWS:
+                cwd_path = _msys_to_windows_path(cwd_path)
             if cwd_path and os.path.isdir(cwd_path):
                 self.cwd = cwd_path
         except (OSError, FileNotFoundError):
@@ -583,6 +624,30 @@ class LocalEnvironment(BaseEnvironment):
         # Still strip the marker from output so it's not visible
         self._extract_cwd_from_output(result)
 
+    def _extract_cwd_from_output(self, result: dict):
+        """Same semantics as the base class, but on Windows the value
+        emitted by ``pwd -P`` inside Git Bash is in MSYS form
+        (``/c/Users/x``). Normalize to native Windows form and validate
+        the directory exists before assigning to ``self.cwd`` — otherwise
+        ``_run_bash``'s safe-cwd recovery would warn on every subsequent
+        command.
+
+        Always defers to the base class for stripping the marker text from
+        ``result["output"]`` so output formatting is identical.
+        """
+        # Snapshot pre-existing cwd, defer to base for parsing + marker
+        # stripping, then validate / normalize whatever it assigned.
+        prev_cwd = self.cwd
+        super()._extract_cwd_from_output(result)
+        if self.cwd != prev_cwd:
+            normalized = _msys_to_windows_path(self.cwd) if _IS_WINDOWS else self.cwd
+            if normalized and os.path.isdir(normalized):
+                self.cwd = normalized
+            else:
+                # Stale / non-existent path — keep previous cwd; _run_bash
+                # will resolve a safe fallback on the next call if needed.
+                self.cwd = prev_cwd
+
     def cleanup(self):
         """Clean up temp files."""
         for f in (self._snapshot_path, self._cwd_file):
diff --git a/tools/tirith_security.py b/tools/tirith_security.py
index 350265d33a1..1c79892f424 100644
--- a/tools/tirith_security.py
+++ b/tools/tirith_security.py
@@ -101,6 +101,34 @@ _install_failure_reason: str = ""  # reason tag when _resolved_path is _INSTALL_
 _install_lock = threading.Lock()
 _install_thread: threading.Thread | None = None
 
+# Warning de-duplication. The spawn/path warnings live in the hot path —
+# without this dedupe set, a Windows install where ``tirith`` isn't on PATH
+# (e.g. background install thread still running, or install marked failed)
+# spams ``tirith spawn failed: [WinError 2]...`` once per terminal command,
+# easily filling errors.log with hundreds of identical lines.
+_warned_messages: set[str] = set()
+_warned_lock = threading.Lock()
+
+
+def _warn_once(key: str, message: str, *args) -> None:
+    """``logger.warning`` but at-most-once per ``key`` for the process
+    lifetime. Used to avoid drowning the log when a fail-open tirith
+    misconfiguration fires on every command."""
+    with _warned_lock:
+        if key in _warned_messages:
+            return
+        _warned_messages.add(key)
+    logger.warning(message, *args)
+
+
+def _reset_spawn_warning_state() -> None:
+    """Clear the warn-once dedupe set. Called when tirith is freshly
+    (re)installed so a subsequent failure surfaces again — e.g. user
+    deletes the binary mid-session.
+    """
+    with _warned_lock:
+        _warned_messages.clear()
+
 # Disk-persistent failure marker — avoids retry across process restarts
 _MARKER_TTL = 86400  # 24 hours
 
@@ -168,6 +196,10 @@ def _mark_install_failed(reason: str = ""):
 
 def _clear_install_failed():
     """Remove the failure marker after successful install."""
+    # Reset the warn-once dedupe set so a subsequent failure (e.g. user
+    # deletes the binary) surfaces in the log again instead of being
+    # silently suppressed by a stale dedupe key from before the fix.
+    _reset_spawn_warning_state()
     try:
         os.unlink(_failure_marker_path())
     except OSError:
@@ -632,7 +664,10 @@ def check_command_security(command: str) -> dict:
     fail_open = cfg["tirith_fail_open"]
 
     if tirith_path is None:
-        logger.warning("tirith path resolved to None; scanning disabled")
+        _warn_once(
+            "tirith_path_none",
+            "tirith path resolved to None; scanning disabled",
+        )
         if fail_open:
             return {"action": "allow", "findings": [], "summary": "tirith path unavailable"}
         return {"action": "block", "findings": [], "summary": "tirith path unavailable (fail-closed)"}
@@ -646,13 +681,23 @@ def check_command_security(command: str) -> dict:
             timeout=timeout,
         )
     except OSError as exc:
-        # Covers FileNotFoundError, PermissionError, exec format error
-        logger.warning("tirith spawn failed: %s", exc)
+        # Covers FileNotFoundError, PermissionError, exec format error.
+        # Dedupe by ``(errno, exc class)`` so a transient failure mode
+        # surfaces once but doesn't drown the log on every command —
+        # commonly seen on Windows when the configured path "tirith"
+        # isn't on PATH yet (background install still running, or
+        # install marked failed for the day).
+        spawn_key = f"tirith_spawn_failed:{type(exc).__name__}:{getattr(exc, 'errno', '')}"
+        _warn_once(spawn_key, "tirith spawn failed: %s", exc)
         if fail_open:
             return {"action": "allow", "findings": [], "summary": f"tirith unavailable: {exc}"}
         return {"action": "block", "findings": [], "summary": f"tirith spawn failed (fail-closed): {exc}"}
     except subprocess.TimeoutExpired:
-        logger.warning("tirith timed out after %ds", timeout)
+        _warn_once(
+            f"tirith_timeout:{timeout}",
+            "tirith timed out after %ds",
+            timeout,
+        )
         if fail_open:
             return {"action": "allow", "findings": [], "summary": f"tirith timed out ({timeout}s)"}
         return {"action": "block", "findings": [], "summary": "tirith timed out (fail-closed)"}

From 31ba2b0cbcac310f7aa2db3c8885e37f2e2e37fb Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 16:35:12 -0700
Subject: [PATCH 089/218] fix(xai-oauth): recover from prelude SSE errors, gate
 reasoning replay, surface entitlement 403s (#26644)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three fixes for the May 2026 xAI OAuth (SuperGrok / X Premium) rollout
failures:

- _run_codex_stream: when openai SDK raises RuntimeError("Expected to
  have received `response.created` before `<type>`"), retry once then
  fall back to responses.create(stream=True) — same path used for
  missing-response.completed postlude.  Fallback surfaces the real
  provider error with body+status_code intact.  Also fixes #8133
  (response.in_progress prelude on custom relays) and #14634
  (codex.rate_limits prelude on codex-lb).

- _summarize_api_error: when error body matches xAI's entitlement
  shape, append a one-line hint pointing to https://grok.com and
  /model.  Once-only, applies to both auxiliary warnings and
  main-loop error surfacing.

- _chat_messages_to_responses_input: new is_xai_responses kwarg
  drops replayed codex_reasoning_items (encrypted_content) before
  they reach xAI.  Also drops reasoning.encrypted_content from the
  xAI include array.  Native Codex behavior unchanged.  Grok still
  reasons natively each turn; coherence rides on visible message
  text alone.

Closes #8133, #14634.
---
 agent/codex_responses_adapter.py              |  27 +-
 agent/transports/codex.py                     |  19 +-
 run_agent.py                                  |  81 +++-
 .../agent/transports/test_codex_transport.py  |  21 +-
 .../test_codex_xai_oauth_recovery.py          | 351 ++++++++++++++++++
 5 files changed, 481 insertions(+), 18 deletions(-)
 create mode 100644 tests/run_agent/test_codex_xai_oauth_recovery.py

diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
index 00345f054e8..6fe9dc5bc64 100644
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
     return default
 
 
-def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Convert internal chat-style messages to Responses input items."""
+def _chat_messages_to_responses_input(
+    messages: List[Dict[str, Any]],
+    *,
+    is_xai_responses: bool = False,
+) -> List[Dict[str, Any]]:
+    """Convert internal chat-style messages to Responses input items.
+
+    ``is_xai_responses=True`` strips ``encrypted_content`` from replayed
+    reasoning items.  xAI's OAuth/SuperGrok ``/v1/responses`` surface
+    rejects encrypted reasoning blobs minted by prior turns: the request
+    streams an ``error`` SSE frame before ``response.created`` and the
+    OpenAI SDK collapses it into a generic stream-ordering error.  Native
+    Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
+    — keep the default off.
+    """
     items: List[Dict[str, Any]] = []
     seen_item_ids: set = set()
 
@@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
             if role == "assistant":
                 # Replay encrypted reasoning items from previous turns
                 # so the API can maintain coherent reasoning chains.
+                #
+                # xAI OAuth (SuperGrok/Premium) rejects replayed
+                # ``encrypted_content`` reasoning items minted by prior
+                # turns — see _chat_messages_to_responses_input docstring.
+                # When ``is_xai_responses`` is set we drop the replay
+                # entirely; Grok still reasons on each turn server-side,
+                # we just don't try to thread the prior turn's encrypted
+                # blob back in.
                 codex_reasoning = msg.get("codex_reasoning_items")
                 has_codex_reasoning = False
-                if isinstance(codex_reasoning, list):
+                if isinstance(codex_reasoning, list) and not is_xai_responses:
                     for ri in codex_reasoning:
                         if isinstance(ri, dict) and ri.get("encrypted_content"):
                             item_id = ri.get("id")
diff --git a/agent/transports/codex.py b/agent/transports/codex.py
index cfd9f128778..3661ea17a3e 100644
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
     def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
         """Convert OpenAI chat messages to Responses API input items."""
         from agent.codex_responses_adapter import _chat_messages_to_responses_input
-        return _chat_messages_to_responses_input(messages)
+        return _chat_messages_to_responses_input(
+            messages,
+            is_xai_responses=bool(kwargs.get("is_xai_responses")),
+        )
 
     def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
         """Convert OpenAI tool schemas to Responses API function definitions."""
@@ -93,7 +96,10 @@ class ResponsesApiTransport(ProviderTransport):
         kwargs = {
             "model": model,
             "instructions": instructions,
-            "input": _chat_messages_to_responses_input(payload_messages),
+            "input": _chat_messages_to_responses_input(
+                payload_messages,
+                is_xai_responses=is_xai_responses,
+            ),
             "tools": response_tools,
             "store": False,
         }
@@ -110,7 +116,14 @@ class ResponsesApiTransport(ProviderTransport):
         if reasoning_enabled and is_xai_responses:
             from agent.model_metadata import grok_supports_reasoning_effort
 
-            kwargs["include"] = ["reasoning.encrypted_content"]
+            # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
+            # any more.  xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
+            # replayed encrypted reasoning items on turn 2+ — see
+            # _chat_messages_to_responses_input docstring.  Requesting the field
+            # back would just have us cache something we then must strip.  Grok
+            # still reasons natively each turn; coherence across turns rides on
+            # the visible message text alone.
+            kwargs["include"] = []
             # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
             # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
             # those models reason natively. Only send the effort dial when
diff --git a/run_agent.py b/run_agent.py
index 7e42beb3eba..2b20d48ede2 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4966,6 +4966,45 @@ class AIAgent:
         trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
         _save_trajectory_to_file(trajectory, self.model, completed)
 
+    @staticmethod
+    def _decorate_xai_entitlement_error(detail: str) -> str:
+        """Append a friendly hint when xAI's OAuth surface returns an
+        entitlement-shaped error.
+
+        xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a
+        SuperGrok / X Premium subscription with HTTP 403 carrying a body like::
+
+            {"code": "The caller does not have permission to execute the
+             specified operation", "error": "You have either run out of
+             available resources or do not have an active Grok subscription.
+             Manage subscriptions at https://grok.com/..."}
+
+        The raw text is useful but the action the user needs to take (subscribe
+        on grok.com, or switch providers with ``/model``) isn't obvious from
+        the wire format.  Detect the entitlement shape and append a hint.
+
+        Matched once per detail string — won't double-decorate if the upstream
+        already concatenated the same text.
+        """
+        if not detail:
+            return detail
+        lower = detail.lower()
+        is_entitlement = (
+            "do not have an active grok subscription" in lower
+            or ("out of available resources" in lower and "grok" in lower)
+            or ("does not have permission" in lower and "grok" in lower)
+        )
+        if not is_entitlement:
+            return detail
+        hint = (
+            " — xAI OAuth account lacks SuperGrok / X Premium entitlement for "
+            "this model. Subscribe at https://grok.com or run `/model` to "
+            "switch providers."
+        )
+        if hint.strip() in detail:
+            return detail
+        return f"{detail}{hint}"
+
     @staticmethod
     def _summarize_api_error(error: Exception) -> str:
         """Extract a human-readable one-liner from an API error.
@@ -4999,12 +5038,12 @@ class AIAgent:
             if msg:
                 status_code = getattr(error, "status_code", None)
                 prefix = f"HTTP {status_code}: " if status_code else ""
-                return f"{prefix}{msg[:300]}"
+                return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")
 
         # Fallback: truncate the raw string but give more room than 200 chars
         status_code = getattr(error, "status_code", None)
         prefix = f"HTTP {status_code}: " if status_code else ""
-        return f"{prefix}{raw[:500]}"
+        return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}")
 
     def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
         if not key:
@@ -7056,18 +7095,48 @@ class AIAgent:
             except RuntimeError as exc:
                 err_text = str(exc)
                 missing_completed = "response.completed" in err_text
-                if missing_completed and attempt < max_stream_retries:
+                # The OpenAI SDK's Responses streaming state machine raises
+                # ``RuntimeError("Expected to have received `response.created`
+                # before `<event-type>`")`` when the first SSE event from the
+                # server is anything other than ``response.created`` — and it
+                # discards the event's payload before we can read it.  Three
+                # real-world backends emit a different first frame:
+                #
+                #   * xAI on grok-4.x OAuth — sends ``error`` (issues
+                #     reported around the May 2026 SuperGrok rollout when
+                #     multi-turn conversations replay encrypted reasoning
+                #     content the OAuth tier rejects)
+                #   * codex-lb relays — send ``codex.rate_limits`` (#14634)
+                #   * custom Responses relays — send ``response.in_progress``
+                #     (#8133)
+                #
+                # In all three cases the underlying byte stream is still
+                # readable: a non-stream ``responses.create(stream=True)``
+                # fallback succeeds and surfaces the real provider error as
+                # a normal exception with body+status_code attached, which
+                # ``_summarize_api_error`` can then translate into a useful
+                # user-facing line.  Treat ``response.created`` prelude
+                # errors the same way we already treat ``response.completed``
+                # postlude errors.
+                prelude_error = (
+                    "Expected to have received `response.created`" in err_text
+                    or "Expected to have received \"response.created\"" in err_text
+                )
+                if (missing_completed or prelude_error) and attempt < max_stream_retries:
                     logger.debug(
-                        "Responses stream closed before completion (attempt %s/%s); retrying. %s",
+                        "Responses stream %s (attempt %s/%s); retrying. %s",
+                        "prelude rejected" if prelude_error else "closed before completion",
                         attempt + 1,
                         max_stream_retries + 1,
                         self._client_log_context(),
                     )
                     continue
-                if missing_completed:
+                if missing_completed or prelude_error:
                     logger.debug(
-                        "Responses stream did not emit response.completed; falling back to create(stream=True). %s",
+                        "Responses stream %s; falling back to create(stream=True). %s err=%s",
+                        "rejected before response.created" if prelude_error else "did not emit response.completed",
                         self._client_log_context(),
+                        err_text,
                     )
                     return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
                 raise
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
index ad70167b09f..82251823790 100644
--- a/tests/agent/transports/test_codex_transport.py
+++ b/tests/agent/transports/test_codex_transport.py
@@ -194,9 +194,16 @@ class TestCodexBuildKwargs:
             is_xai_responses=True,
             reasoning_config={"effort": "high"},
         )
-        # xAI Responses must receive both encrypted reasoning content and the effort
+        # xAI Responses receives reasoning.effort on the allowlisted models.
         assert kw.get("reasoning") == {"effort": "high"}
-        assert "reasoning.encrypted_content" in kw.get("include", [])
+        # As of May 2026 we deliberately do NOT request
+        # reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
+        # surface rejects replayed encrypted reasoning items on turn 2+
+        # (the multi-turn "Expected to have received response.created
+        # before error" failure).  Grok still reasons natively each turn;
+        # we just don't try to thread the prior turn's encrypted blob back
+        # in.  See tests/run_agent/test_codex_xai_oauth_recovery.py.
+        assert "reasoning.encrypted_content" not in kw.get("include", [])
 
     def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
         messages = [{"role": "user", "content": "Hi"}]
@@ -222,8 +229,9 @@ class TestCodexBuildKwargs:
     # api.x.ai 400s with "Model X does not support parameter reasoningEffort"
     # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
     # Those models reason natively but don't expose the dial. The transport
-    # must omit the `reasoning` key for them while keeping the encrypted
-    # reasoning content include so we can capture native reasoning tokens.
+    # must omit the `reasoning` key for them.  As of May 2026 we also no
+    # longer request ``reasoning.encrypted_content`` back from xAI on ANY
+    # model — see test_xai_reasoning_effort_passed for the rationale.
 
     def test_xai_grok_4_omits_reasoning_effort(self, transport):
         """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
@@ -237,8 +245,9 @@ class TestCodexBuildKwargs:
             assert "reasoning" not in kw, (
                 f"{model} must not receive a reasoning key (xAI rejects it)"
             )
-            # Still capture native reasoning tokens
-            assert "reasoning.encrypted_content" in kw.get("include", [])
+            # We no longer ask xAI for encrypted_content back (see comment
+            # above) — verify the include list is empty.
+            assert "reasoning.encrypted_content" not in kw.get("include", [])
 
     def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
         """grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
new file mode 100644
index 00000000000..0f3603d2ca7
--- /dev/null
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -0,0 +1,351 @@
+"""Regression tests for the May 2026 xAI OAuth (SuperGrok / X Premium) bugs.
+
+Three distinct failure modes the user community hit during rollout:
+
+1. ``RuntimeError("Expected to have received `response.created` before
+   `error`")`` on multi-turn xAI OAuth conversations.  The OpenAI SDK's
+   Responses streaming state machine collapses an upstream ``error`` SSE
+   frame into a generic stream-ordering error.  ``_run_codex_stream``
+   now treats this the same way it already treats the missing
+   ``response.completed`` postlude — fall back to a non-stream
+   ``responses.create(stream=True)`` which surfaces the real provider
+   error.  Also closes #8133 (``response.in_progress`` prelude on custom
+   relays) and #14634 (``codex.rate_limits`` prelude on codex-lb).
+
+2. The HTTP 403 entitlement error xAI returns when an OAuth token lacks
+   SuperGrok / X Premium ("You have either run out of available
+   resources or do not have an active Grok subscription") used to read
+   as a confusing wall of JSON.  ``_summarize_api_error`` now appends a
+   one-line hint pointing the user at https://grok.com and ``/model``.
+
+3. Multi-turn replay of ``codex_reasoning_items`` (with
+   ``encrypted_content``) is now suppressed for ``is_xai_responses=True``
+   in ``_chat_messages_to_responses_input``.  xAI's OAuth/SuperGrok
+   surface rejects replayed encrypted reasoning items; Grok still
+   reasons natively each turn, so coherence rides on visible message
+   text.
+"""
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Fix A: prelude error fallback
+# ---------------------------------------------------------------------------
+
+
+def _make_codex_agent():
+    """Build a minimal AIAgent wired for codex_responses streaming tests."""
+    from run_agent import AIAgent
+
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://api.x.ai/v1",
+        model="grok-4.3",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent.api_mode = "codex_responses"
+    agent.provider = "xai-oauth"
+    agent._interrupt_requested = False
+    return agent
+
+
+@pytest.mark.parametrize(
+    "prelude_event_type",
+    [
+        "error",                  # xAI OAuth multi-turn
+        "codex.rate_limits",      # codex-lb relays (#14634)
+        "response.in_progress",   # custom Responses relays (#8133)
+    ],
+)
+def test_codex_stream_prelude_error_falls_back_to_create_stream(prelude_event_type):
+    """The SDK's prelude RuntimeError must trigger the non-stream fallback.
+
+    When the first SSE event isn't ``response.created``, openai-python
+    raises RuntimeError before our event loop sees anything.  We must
+    detect that, retry once, then fall back to ``create(stream=True)``
+    which surfaces the real provider error or a real response.
+    """
+    agent = _make_codex_agent()
+
+    prelude_error = RuntimeError(
+        f"Expected to have received `response.created` before `{prelude_event_type}`"
+    )
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = prelude_error
+
+    fallback_response = SimpleNamespace(
+        output=[SimpleNamespace(
+            type="message",
+            content=[SimpleNamespace(type="output_text", text="fallback ok")],
+        )],
+        status="completed",
+    )
+
+    with patch.object(
+        agent, "_run_codex_create_stream_fallback", return_value=fallback_response
+    ) as mock_fallback:
+        result = agent._run_codex_stream({}, client=mock_client)
+
+    assert result is fallback_response
+    mock_fallback.assert_called_once_with({}, client=mock_client)
+
+
+def test_codex_stream_prelude_error_retries_once_before_fallback():
+    """The retry path must fire one extra stream attempt before falling back."""
+    agent = _make_codex_agent()
+
+    call_count = {"n": 0}
+
+    def stream_side_effect(**kwargs):
+        call_count["n"] += 1
+        raise RuntimeError(
+            "Expected to have received `response.created` before `error`"
+        )
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = stream_side_effect
+
+    fallback_response = SimpleNamespace(output=[], status="completed")
+    with patch.object(
+        agent, "_run_codex_create_stream_fallback", return_value=fallback_response
+    ) as mock_fallback:
+        agent._run_codex_stream({}, client=mock_client)
+
+    # max_stream_retries=1 → one retry + final attempt → 2 stream calls,
+    # THEN the fallback path runs.
+    assert call_count["n"] == 2
+    mock_fallback.assert_called_once()
+
+
+def test_codex_stream_unrelated_runtimeerror_still_raises():
+    """RuntimeErrors that aren't prelude/postlude shape must propagate."""
+    agent = _make_codex_agent()
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = RuntimeError("something else broke")
+
+    with patch.object(agent, "_run_codex_create_stream_fallback") as mock_fallback:
+        with pytest.raises(RuntimeError, match="something else broke"):
+            agent._run_codex_stream({}, client=mock_client)
+
+    mock_fallback.assert_not_called()
+
+
+def test_codex_stream_postlude_error_still_falls_back():
+    """Existing ``response.completed`` fallback must not regress."""
+    agent = _make_codex_agent()
+
+    mock_client = MagicMock()
+    mock_client.responses.stream.side_effect = RuntimeError(
+        "Didn't receive a `response.completed` event."
+    )
+
+    fallback_response = SimpleNamespace(output=[], status="completed")
+    with patch.object(
+        agent, "_run_codex_create_stream_fallback", return_value=fallback_response
+    ) as mock_fallback:
+        result = agent._run_codex_stream({}, client=mock_client)
+
+    assert result is fallback_response
+    mock_fallback.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# Fix B: friendly entitlement message
+# ---------------------------------------------------------------------------
+
+
+def test_summarize_api_error_decorates_xai_entitlement_403():
+    """xAI's OAuth 403 must end with the subscribe-or-switch hint."""
+    from run_agent import AIAgent
+
+    error = RuntimeError(
+        "HTTP 403: Error code: 403 - {'code': 'The caller does not have permission "
+        "to execute the specified operation', 'error': 'You have either run out of "
+        "available resources or do not have an active Grok subscription. Manage "
+        "subscriptions at https://grok.com'}"
+    )
+    summary = AIAgent._summarize_api_error(error)
+    assert "do not have an active Grok subscription" in summary
+    assert "SuperGrok" in summary
+    assert "/model" in summary
+    assert "https://grok.com" in summary
+
+
+def test_summarize_api_error_decorates_xai_body_message():
+    """SDK-style error with structured body must also get the hint."""
+    from run_agent import AIAgent
+
+    class _XaiErr(Exception):
+        status_code = 403
+        body = {
+            "error": {
+                "message": (
+                    "You have either run out of available resources or do "
+                    "not have an active Grok subscription. Manage at "
+                    "https://grok.com"
+                )
+            }
+        }
+
+    summary = AIAgent._summarize_api_error(_XaiErr("403"))
+    assert "HTTP 403" in summary
+    assert "SuperGrok / X Premium" in summary
+
+
+def test_summarize_api_error_idempotent_for_entitlement_hint():
+    """Decorating twice must not double up the hint."""
+    from run_agent import AIAgent
+
+    raw = "HTTP 403: do not have an active Grok subscription"
+    once = AIAgent._decorate_xai_entitlement_error(raw)
+    twice = AIAgent._decorate_xai_entitlement_error(once)
+    assert once == twice
+
+
+def test_summarize_api_error_passes_through_unrelated_errors():
+    """Non-xAI / non-entitlement errors must not be touched."""
+    from run_agent import AIAgent
+
+    error = RuntimeError("HTTP 500: upstream is sad")
+    summary = AIAgent._summarize_api_error(error)
+    assert "SuperGrok" not in summary
+    assert "grok.com" not in summary
+    assert "upstream is sad" in summary
+
+
+# ---------------------------------------------------------------------------
+# Fix C: reasoning replay gating for xai-oauth
+# ---------------------------------------------------------------------------
+
+
+def _assistant_msg_with_encrypted_reasoning(text="hi from grok", encrypted="enc_blob"):
+    return {
+        "role": "assistant",
+        "content": text,
+        "codex_reasoning_items": [
+            {
+                "type": "reasoning",
+                "id": "rs_xai_001",
+                "encrypted_content": encrypted,
+                "summary": [],
+            }
+        ],
+    }
+
+
+def test_codex_reasoning_replay_default_includes_encrypted_content():
+    """Native Codex backend (default) must still replay encrypted reasoning."""
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+
+    msgs = [
+        {"role": "user", "content": "hi"},
+        _assistant_msg_with_encrypted_reasoning(),
+        {"role": "user", "content": "what's your name?"},
+    ]
+
+    items = _chat_messages_to_responses_input(msgs)
+    reasoning = [it for it in items if it.get("type") == "reasoning"]
+    assert len(reasoning) == 1
+    assert reasoning[0]["encrypted_content"] == "enc_blob"
+
+
+def test_codex_reasoning_replay_stripped_for_xai_oauth():
+    """xAI OAuth surface must NOT receive replayed encrypted reasoning."""
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+
+    msgs = [
+        {"role": "user", "content": "hi"},
+        _assistant_msg_with_encrypted_reasoning(),
+        {"role": "user", "content": "what's your name?"},
+    ]
+
+    items = _chat_messages_to_responses_input(msgs, is_xai_responses=True)
+    reasoning = [it for it in items if it.get("type") == "reasoning"]
+    assert reasoning == []
+
+    # The assistant's visible text must still survive — coherence across
+    # turns rides on the message text alone.
+    assistant_items = [
+        it for it in items
+        if it.get("role") == "assistant" or it.get("type") == "message"
+    ]
+    assert assistant_items, "assistant message must still be present"
+
+
+def test_codex_transport_xai_request_omits_encrypted_content_include():
+    """Verify the xAI ``include`` array no longer requests encrypted reasoning."""
+    from agent.transports.codex import ResponsesApiTransport
+
+    transport = ResponsesApiTransport()
+    kwargs = transport.build_kwargs(
+        model="grok-4.3",
+        messages=[
+            {"role": "system", "content": "you are a helpful assistant"},
+            {"role": "user", "content": "hi"},
+        ],
+        tools=None,
+        instructions="you are a helpful assistant",
+        reasoning_config={"enabled": True, "effort": "medium"},
+        is_xai_responses=True,
+    )
+    # Without this gate, xAI would echo back encrypted_content blobs we'd
+    # then store in codex_reasoning_items and replay next turn — which is
+    # exactly the multi-turn failure mode we're closing.
+    assert kwargs["include"] == []
+
+
+def test_codex_transport_xai_strips_replayed_reasoning_in_input():
+    """End-to-end: build_kwargs on xai-oauth must strip prior reasoning."""
+    from agent.transports.codex import ResponsesApiTransport
+
+    transport = ResponsesApiTransport()
+    kwargs = transport.build_kwargs(
+        model="grok-4.3",
+        messages=[
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+            _assistant_msg_with_encrypted_reasoning(text="hi from grok"),
+            {"role": "user", "content": "what's your name?"},
+        ],
+        tools=None,
+        instructions="sys",
+        reasoning_config={"enabled": True, "effort": "medium"},
+        is_xai_responses=True,
+    )
+    input_items = kwargs["input"]
+    reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
+    assert reasoning_items == []
+
+
+def test_codex_transport_native_codex_still_replays_reasoning_in_input():
+    """Regression guard: openai-codex must keep the existing replay path."""
+    from agent.transports.codex import ResponsesApiTransport
+
+    transport = ResponsesApiTransport()
+    kwargs = transport.build_kwargs(
+        model="gpt-5-codex",
+        messages=[
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+            _assistant_msg_with_encrypted_reasoning(text="hi from codex"),
+            {"role": "user", "content": "next"},
+        ],
+        tools=None,
+        instructions="sys",
+        reasoning_config={"enabled": True, "effort": "medium"},
+        is_xai_responses=False,
+    )
+    input_items = kwargs["input"]
+    reasoning_items = [it for it in input_items if it.get("type") == "reasoning"]
+    assert len(reasoning_items) == 1
+    assert reasoning_items[0]["encrypted_content"] == "enc_blob"
+    # Native Codex still asks for encrypted_content back.
+    assert "reasoning.encrypted_content" in kwargs.get("include", [])

From 068c24f8a4203e86de32b0d84ccaf047e8cd6ef7 Mon Sep 17 00:00:00 2001
From: twebefy <twebefy@gmail.com>
Date: Sat, 25 Apr 2026 00:46:10 +0800
Subject: [PATCH 090/218] feat(deepseek): add thinking.type + reasoning_effort
 mapping for DeepSeek API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DeepSeek's thinking mode requires both:
- extra_body.thinking.type: "enabled" to activate thinking mode
- top-level reasoning_effort: "max" or "high" to control depth

Previously, the ChatCompletionsTransport only handled Kimi's thinking
mode — DeepSeek was left unmapped, so reasoning_effort config was
silently dropped.

This patch:
1. Adds is_deepseek: bool to the Params dataclass, detected by
   base_url matching api.deepseek.com
2. Maps Hermes effort levels (xhigh/max → "max", low/medium/high →
   themselves) to the top-level reasoning_effort parameter
3. Sets extra_body.thinking.type alongside the effort
4. Strips reasoning_content from assistant messages sent back to
   DeepSeek, preventing 400 errors when thinking was enabled
---
 agent/transports/chat_completions.py | 20 ++++++++++++++++++++
 run_agent.py                         |  7 +++++++
 2 files changed, 27 insertions(+)

diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index 7edb69e42c7..1ae584e9159 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -189,6 +189,7 @@ class ChatCompletionsTransport(ProviderTransport):
             is_kimi: bool
             is_tokenhub: bool
             is_lmstudio: bool
+            is_deepseek: bool
             is_custom_provider: bool
             ollama_num_ctx: int | None
             # Provider routing
@@ -348,6 +349,25 @@ class ChatCompletionsTransport(ProviderTransport):
                 "type": "enabled" if _kimi_thinking_enabled else "disabled",
             }
 
+        # DeepSeek extra_body.thinking + top-level reasoning_effort
+        is_deepseek = params.get("is_deepseek", False)
+        if is_deepseek:
+            _ds_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _ds_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _ds_thinking_enabled else "disabled",
+            }
+            # DeepSeek effort: low/medium→high, high→high, xhigh/max→max
+            if _ds_thinking_enabled and reasoning_config:
+                _e = (reasoning_config.get("effort") or "").strip().lower()
+                if _e in ("xhigh", "max"):
+                    api_kwargs["reasoning_effort"] = "max"
+                elif _e in ("low", "medium", "high"):
+                    api_kwargs["reasoning_effort"] = _e
+            # If no effort configured, don't set it → DeepSeek defaults to high
+
         # Reasoning. LM Studio is handled above via top-level reasoning_effort,
         # so skip emitting extra_body.reasoning for it.
         if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
diff --git a/run_agent.py b/run_agent.py
index 2b20d48ede2..c9aa3157170 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9798,6 +9798,7 @@ class AIAgent:
         )
         _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com")
         _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio"
+        _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com")
 
         # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
         # sentinel (temperature omitted entirely), a numeric override, or None.
@@ -9909,6 +9910,7 @@ class AIAgent:
             is_kimi=_is_kimi,
             is_tokenhub=_is_tokenhub,
             is_lmstudio=_is_lmstudio,
+            is_deepseek=_is_deepseek,
             is_custom_provider=self.provider == "custom",
             ollama_num_ctx=self._ollama_num_ctx,
             provider_preferences=_prefs or None,
@@ -10368,6 +10370,11 @@ class AIAgent:
         # context compaction).  Don't pass null to the API.
         api_msg.pop("reasoning_content", None)
 
+        # DeepSeek: strip reasoning_content on all assistant messages so the API
+        # doesn't return 400 when the model was invoked with thinking enabled.
+        if base_url_host_matches(self.base_url, "api.deepseek.com"):
+            api_msg.pop("reasoning_content", None)
+
     @staticmethod
     def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
         """Strip Codex Responses API fields from tool_calls for strict providers.

From cd9470f41638bd515db096cd934c463205790110 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 16:39:18 -0700
Subject: [PATCH 091/218] fix(deepseek): wire thinking-mode via
 DeepSeekProfile, not legacy fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cherry-picked PR #15251 from @tw2818 correctly identified the
DeepSeek 400 root cause but placed the fix in the legacy fallback path
of `build_kwargs`, which DeepSeek never reaches — DeepSeek has a
registered ProviderProfile and goes through `_build_kwargs_from_profile`
instead. The legacy-path block was therefore dead code.

This commit pivots the fix to where it actually fires:

- New `DeepSeekProfile` in `plugins/model-providers/deepseek/__init__.py`
  overrides `build_api_kwargs_extras` to emit DeepSeek's expected wire
  format (mirrors `KimiProfile`):

      {"reasoning_effort": "<low|medium|high|max>",
       "extra_body": {"thinking": {"type": "enabled" | "disabled"}}}

- Model gating: only `deepseek-v4-*` and `deepseek-reasoner` emit
  thinking control. `deepseek-chat` (V3) is untouched — current behavior.

- Effort mapping: low/medium/high passthrough, xhigh/max → max, unset →
  omitted (DeepSeek server applies its own default).

- Revert the legacy-path additions from PR #15251 — they were dead code,
  and the `_copy_reasoning_content_for_api` strip block specifically
  would have nullified the existing reasoning_content padding machinery
  (`_needs_deepseek_tool_reasoning` → space-pad on replay) that the
  active provider already relies on for replay correctness.

- Unit tests pin the wire-shape contract and the model gating rules
  (26 tests, all passing). Existing transport + provider profile suites
  (321 tests) continue to pass.

- AUTHOR_MAP: map twebefy@gmail.com → tw2818 for release notes credit.

Closes #15700, #17212, #17825.
Co-authored-by: tw2818 <twebefy@gmail.com>
---
 agent/transports/chat_completions.py          |  20 --
 plugins/model-providers/deepseek/__init__.py  |  83 +++++++-
 run_agent.py                                  |   7 -
 scripts/release.py                            |   1 +
 .../model_providers/test_deepseek_profile.py  | 184 ++++++++++++++++++
 5 files changed, 266 insertions(+), 29 deletions(-)
 create mode 100644 tests/plugins/model_providers/test_deepseek_profile.py

diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index 1ae584e9159..7edb69e42c7 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -189,7 +189,6 @@ class ChatCompletionsTransport(ProviderTransport):
             is_kimi: bool
             is_tokenhub: bool
             is_lmstudio: bool
-            is_deepseek: bool
             is_custom_provider: bool
             ollama_num_ctx: int | None
             # Provider routing
@@ -349,25 +348,6 @@ class ChatCompletionsTransport(ProviderTransport):
                 "type": "enabled" if _kimi_thinking_enabled else "disabled",
             }
 
-        # DeepSeek extra_body.thinking + top-level reasoning_effort
-        is_deepseek = params.get("is_deepseek", False)
-        if is_deepseek:
-            _ds_thinking_enabled = True
-            if reasoning_config and isinstance(reasoning_config, dict):
-                if reasoning_config.get("enabled") is False:
-                    _ds_thinking_enabled = False
-            extra_body["thinking"] = {
-                "type": "enabled" if _ds_thinking_enabled else "disabled",
-            }
-            # DeepSeek effort: low/medium→high, high→high, xhigh/max→max
-            if _ds_thinking_enabled and reasoning_config:
-                _e = (reasoning_config.get("effort") or "").strip().lower()
-                if _e in ("xhigh", "max"):
-                    api_kwargs["reasoning_effort"] = "max"
-                elif _e in ("low", "medium", "high"):
-                    api_kwargs["reasoning_effort"] = _e
-            # If no effort configured, don't set it → DeepSeek defaults to high
-
         # Reasoning. LM Studio is handled above via top-level reasoning_effort,
         # so skip emitting extra_body.reasoning for it.
         if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
diff --git a/plugins/model-providers/deepseek/__init__.py b/plugins/model-providers/deepseek/__init__.py
index 59d738f50fb..f67146df113 100644
--- a/plugins/model-providers/deepseek/__init__.py
+++ b/plugins/model-providers/deepseek/__init__.py
@@ -1,9 +1,88 @@
-"""DeepSeek provider profile."""
+"""DeepSeek provider profile.
+
+DeepSeek's V4 family (and the legacy ``deepseek-reasoner``) defaults to
+thinking-mode ON when ``extra_body.thinking`` is unset.  The API then returns
+``reasoning_content`` and starts enforcing the contract that subsequent turns
+echo it back; combined with how Hermes replays history this lands on the
+notorious HTTP 400 ``reasoning_content must be passed back`` error after the
+first tool call (#15700, #17212, #17825).
+
+This profile overrides :meth:`build_api_kwargs_extras` to mirror the Kimi /
+Moonshot wire shape that DeepSeek's OpenAI-compat endpoint expects:
+
+    {"reasoning_effort": "<low|medium|high|max>",
+     "extra_body": {"thinking": {"type": "enabled" | "disabled"}}}
+
+Non-thinking models (only ``deepseek-chat`` today, which is V3) are left as
+no-ops so we don't perturb the V3 wire format.
+"""
+
+from __future__ import annotations
+
+from typing import Any
 
 from providers import register_provider
 from providers.base import ProviderProfile
 
-deepseek = ProviderProfile(
+
+def _model_supports_thinking(model: str | None) -> bool:
+    """DeepSeek thinking-capable model families.
+
+    Currently covers the V4 family (``deepseek-v4-pro``, ``deepseek-v4-flash``,
+    and any future ``deepseek-v4-*`` variants) and the legacy
+    ``deepseek-reasoner`` (R1).  ``deepseek-chat`` is V3 with no thinking mode.
+    """
+    m = (model or "").strip().lower()
+    if not m:
+        return False
+    if m.startswith("deepseek-v") and not m.startswith("deepseek-v3"):
+        # deepseek-v4-*, deepseek-v5-*, etc. — every V4+ generation has
+        # thinking. v3 explicitly excluded.
+        return True
+    if m == "deepseek-reasoner":
+        return True
+    return False
+
+
+class DeepSeekProfile(ProviderProfile):
+    """DeepSeek — extra_body.thinking + top-level reasoning_effort."""
+
+    def build_api_kwargs_extras(
+        self, *, reasoning_config: dict | None = None, model: str | None = None, **context
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        extra_body: dict[str, Any] = {}
+        top_level: dict[str, Any] = {}
+
+        if not _model_supports_thinking(model):
+            # V3 / unknown — leave wire format untouched, current behavior.
+            return extra_body, top_level
+
+        # Determine enabled/disabled.  Default is enabled to match DeepSeek's
+        # API default; the API requires this to be set explicitly to avoid the
+        # reasoning_content echo trap on subsequent turns.
+        enabled = True
+        if isinstance(reasoning_config, dict) and reasoning_config.get("enabled") is False:
+            enabled = False
+
+        extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"}
+
+        if not enabled:
+            return extra_body, top_level
+
+        # Effort mapping.  Pass low/medium/high through; xhigh/max → max.
+        # When no effort is set we omit reasoning_effort so DeepSeek applies
+        # its server default (currently high).
+        if isinstance(reasoning_config, dict):
+            effort = (reasoning_config.get("effort") or "").strip().lower()
+            if effort in ("xhigh", "max"):
+                top_level["reasoning_effort"] = "max"
+            elif effort in ("low", "medium", "high"):
+                top_level["reasoning_effort"] = effort
+
+        return extra_body, top_level
+
+
+deepseek = DeepSeekProfile(
     name="deepseek",
     aliases=("deepseek-chat",),
     env_vars=("DEEPSEEK_API_KEY",),
diff --git a/run_agent.py b/run_agent.py
index c9aa3157170..2b20d48ede2 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9798,7 +9798,6 @@ class AIAgent:
         )
         _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com")
         _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio"
-        _is_deepseek = base_url_host_matches(self.base_url, "api.deepseek.com")
 
         # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
         # sentinel (temperature omitted entirely), a numeric override, or None.
@@ -9910,7 +9909,6 @@ class AIAgent:
             is_kimi=_is_kimi,
             is_tokenhub=_is_tokenhub,
             is_lmstudio=_is_lmstudio,
-            is_deepseek=_is_deepseek,
             is_custom_provider=self.provider == "custom",
             ollama_num_ctx=self._ollama_num_ctx,
             provider_preferences=_prefs or None,
@@ -10370,11 +10368,6 @@ class AIAgent:
         # context compaction).  Don't pass null to the API.
         api_msg.pop("reasoning_content", None)
 
-        # DeepSeek: strip reasoning_content on all assistant messages so the API
-        # doesn't return 400 when the model was invoked with thinking enabled.
-        if base_url_host_matches(self.base_url, "api.deepseek.com"):
-            api_msg.pop("reasoning_content", None)
-
     @staticmethod
     def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
         """Strip Codex Responses API fields from tool_calls for strict providers.
diff --git a/scripts/release.py b/scripts/release.py
index aafa626329e..6084e0754c0 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -73,6 +73,7 @@ AUTHOR_MAP = {
     "teknium@nousresearch.com": "teknium1",
     "piyushvp1@gmail.com": "thelumiereguy",
     "421774554@qq.com": "wuli666",
+    "twebefy@gmail.com": "tw2818",
     "harish.kukreja@gmail.com": "counterposition",
     "korkyzer@gmail.com": "Korkyzer",
     "1046611633@qq.com": "zhengyn0001",
diff --git a/tests/plugins/model_providers/test_deepseek_profile.py b/tests/plugins/model_providers/test_deepseek_profile.py
new file mode 100644
index 00000000000..c53e70070a8
--- /dev/null
+++ b/tests/plugins/model_providers/test_deepseek_profile.py
@@ -0,0 +1,184 @@
+"""Unit tests for the DeepSeek provider profile's thinking-mode wiring.
+
+DeepSeek V4 (and the legacy ``deepseek-reasoner``) expects every request to
+carry an explicit ``extra_body.thinking`` parameter.  Omitting it makes the
+server default to thinking-mode ON, which then enforces the
+``reasoning_content``-must-be-echoed-back contract on subsequent turns and
+breaks the conversation with HTTP 400 (#15700, #17212, #17825).
+
+These tests pin the profile's wire-shape contract so DeepSeek requests stay
+correctly shaped without going live.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture
+def deepseek_profile():
+    """Resolve the registered DeepSeek profile.
+
+    Going through ``providers.get_provider_profile`` keeps the test honest —
+    if someone later replaces the registered class with a plain
+    ``ProviderProfile``, every assertion below collapses.
+    """
+    # ``model_tools`` triggers plugin discovery on import, which is what
+    # registers the DeepSeek profile in the global provider registry.
+    import model_tools  # noqa: F401
+    import providers
+
+    profile = providers.get_provider_profile("deepseek")
+    assert profile is not None, "deepseek provider profile must be registered"
+    return profile
+
+
+class TestDeepSeekThinkingWireShape:
+    """``build_api_kwargs_extras`` produces DeepSeek's exact wire format."""
+
+    def test_v4_pro_default_enables_thinking_without_effort(self, deepseek_profile):
+        """No reasoning_config → thinking enabled, server picks default effort."""
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config=None, model="deepseek-v4-pro"
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {}
+
+    def test_v4_pro_enabled_with_high_effort(self, deepseek_profile):
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "high"},
+            model="deepseek-v4-pro",
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {"reasoning_effort": "high"}
+
+    @pytest.mark.parametrize("effort", ["low", "medium", "high"])
+    def test_standard_efforts_pass_through(self, deepseek_profile, effort):
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": effort},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {"reasoning_effort": effort}
+
+    @pytest.mark.parametrize("effort", ["xhigh", "max", "MAX", "  Max  "])
+    def test_xhigh_and_max_normalize_to_max(self, deepseek_profile, effort):
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": effort},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {"reasoning_effort": "max"}
+
+    def test_explicitly_disabled_sends_disabled_marker(self, deepseek_profile):
+        """``reasoning_config.enabled=False`` → ``thinking.type=disabled``.
+
+        The crucial bit is that the parameter is *sent* at all — DeepSeek
+        defaults to thinking-on when ``thinking`` is absent.
+        """
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False}, model="deepseek-v4-pro"
+        )
+        assert extra_body == {"thinking": {"type": "disabled"}}
+        # No effort when disabled — DeepSeek rejects it.
+        assert top_level == {}
+
+    def test_disabled_ignores_effort_field(self, deepseek_profile):
+        """Effort silently dropped when thinking is off."""
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False, "effort": "high"},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {}
+
+    def test_unknown_effort_omits_top_level(self, deepseek_profile):
+        """Garbage effort → omit reasoning_effort so DeepSeek applies its default."""
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "garbage"},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {}
+
+    def test_empty_effort_omits_top_level(self, deepseek_profile):
+        _, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": ""},
+            model="deepseek-v4-pro",
+        )
+        assert top_level == {}
+
+
+class TestDeepSeekModelGating:
+    """V4 family + ``deepseek-reasoner`` get thinking; V3 stays untouched."""
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "deepseek-v4-pro",
+            "deepseek-v4-flash",
+            "deepseek-v4-future-variant",
+            "deepseek-reasoner",
+            "DEEPSEEK-V4-PRO",  # case-insensitive
+        ],
+    )
+    def test_thinking_capable_models_emit_thinking(self, deepseek_profile, model):
+        extra_body, _ = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config=None, model=model
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "deepseek-chat",         # V3 alias
+            "deepseek-v3-0324",      # explicit V3
+            "deepseek-v3.1",         # V3 minor revisions
+            "",                       # bare/unknown
+            None,                     # missing
+            "deepseek-unknown",      # unrecognized
+        ],
+    )
+    def test_non_thinking_models_emit_nothing(self, deepseek_profile, model):
+        extra_body, top_level = deepseek_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "high"}, model=model
+        )
+        assert extra_body == {}
+        assert top_level == {}
+
+
+class TestDeepSeekFullKwargsIntegration:
+    """End-to-end: the transport's full kwargs match DeepSeek's live wire format.
+
+    The live test harness in ``tests/run_agent/test_deepseek_v4_thinking_live.py``
+    sends ``{"reasoning_effort": "high", "extra_body": {"thinking": {"type":
+    "enabled"}}}``.  Confirm the transport produces that exact shape when wired
+    through the registered DeepSeek profile.
+    """
+
+    def test_full_kwargs_match_live_wire_shape(self, deepseek_profile):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        kwargs = ChatCompletionsTransport().build_kwargs(
+            model="deepseek-v4-pro",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=deepseek_profile,
+            reasoning_config={"enabled": True, "effort": "high"},
+            base_url="https://api.deepseek.com/v1",
+            provider_name="deepseek",
+        )
+        assert kwargs["model"] == "deepseek-v4-pro"
+        assert kwargs["reasoning_effort"] == "high"
+        assert kwargs["extra_body"] == {"thinking": {"type": "enabled"}}
+
+    def test_v3_chat_full_kwargs_omit_thinking(self, deepseek_profile):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        kwargs = ChatCompletionsTransport().build_kwargs(
+            model="deepseek-chat",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=deepseek_profile,
+            reasoning_config={"enabled": True, "effort": "high"},
+            base_url="https://api.deepseek.com/v1",
+            provider_name="deepseek",
+        )
+        assert "reasoning_effort" not in kwargs
+        assert "extra_body" not in kwargs or "thinking" not in kwargs.get("extra_body", {})

From dc4cde278ba0523c01c2c29988e59a567a19ef22 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 17:04:30 -0700
Subject: [PATCH 092/218] feat(docs): show per-skill pages in the left sidebar
 (#26646)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Individual skill pages (e.g. /docs/user-guide/skills/bundled/productivity/notion)
had no sidebar rendered — the sidebar config only listed the two catalog index
pages. That was an intentional choice from an earlier 'too many entries would
drown product docs' concern, but the effect is that a user landing on any skill
page (via search, share link, or the catalog table) loses navigation entirely
and can't see related skills.

Wire build_sidebar_items() (which was already computed and discarded) back into
the sidebar. Structure:

  Skills
  ├── Bundled skills catalog       (catalog table, was already there)
  ├── Optional skills catalog      (catalog table, was already there)
  ├── Bundled
  │   ├── apple/
  │   │   ├── apple-apple-notes
  │   │   └── ...
  │   └── ... (one collapsed category per skill category)
  └── Optional
      └── ... (same)

Categories are collapsed by default so the top-level Skills entry doesn't
explode visually. Users browsing one skill see siblings in the same category;
the catalogs remain the at-a-glance entry point.

Also includes drift the regen script naturally produces on top of current main:
- creative-comfyui v5.0.0 → v5.1.0 page (author + new ref file)
- devops-kanban-worker SKILL.md updates
- new pages for optional skills that lacked generated docs:
  hyperliquid, finance-stocks, software-development/rest-graphql-debug
- updated optional-skills-catalog row for those

Validation:
- npx docusaurus build (en locale) succeeded — only pre-existing warnings
- inspected built productivity-notion/index.html: sidebar tree present,
  sibling productivity skills (airtable, linear, etc.) all linked
---
 .../docs/reference/optional-skills-catalog.md |   8 +
 .../bundled/creative/creative-comfyui.md      |  10 +-
 .../bundled/devops/devops-kanban-worker.md    |  23 +
 .../blockchain/blockchain-hyperliquid.md      | 228 ++++++++
 .../skills/optional/finance/finance-stocks.md | 112 ++++
 ...software-development-rest-graphql-debug.md | 531 ++++++++++++++++++
 website/scripts/generate-skill-docs.py        |  94 +++-
 website/sidebars.ts                           | 438 +++++++++++++++
 8 files changed, 1411 insertions(+), 33 deletions(-)
 create mode 100644 website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md
 create mode 100644 website/docs/user-guide/skills/optional/finance/finance-stocks.md
 create mode 100644 website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md

diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 8c4c2f36432..d5839f846d1 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -39,6 +39,7 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | [**evm**](/docs/user-guide/skills/optional/blockchain/blockchain-evm) | Read-only EVM client: wallets, tokens, gas across 8 chains. |
+| [**hyperliquid**](/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid) | Hyperliquid market data, account history, trade review. |
 | [**solana**](/docs/user-guide/skills/optional/blockchain/blockchain-solana) | Query Solana blockchain data with USD pricing — wallet balances, token portfolios with values, transaction details, NFTs, whale detection, and live network stats. Uses Solana RPC + CoinGecko. No API key required. |
 
 ## communication
@@ -88,6 +89,7 @@ hermes skills uninstall <skill-name>
 | [**lbo-model**](/docs/user-guide/skills/optional/finance/finance-lbo-model) | Build leveraged buyout models in Excel — sources & uses, debt schedule, cash sweep, exit multiple, IRR/MOIC sensitivity. Pairs with excel-author. Use for PE screening, sponsor-case valuation, or illustrative LBO in a pitch. |
 | [**merger-model**](/docs/user-guide/skills/optional/finance/finance-merger-model) | Build accretion/dilution (merger) models in Excel — pro-forma P&L, synergies, financing mix, EPS impact. Pairs with excel-author. Use for M&A pitches, board materials, or deal evaluation. |
 | [**pptx-author**](/docs/user-guide/skills/optional/finance/finance-pptx-author) | Build PowerPoint decks headless with python-pptx. Pairs with excel-author for model-backed decks where every number traces to a workbook cell. Use for pitch decks, IC memos, earnings notes. |
+| [**stocks**](/docs/user-guide/skills/optional/finance/finance-stocks) | Stock quotes, history, search, compare, crypto via Yahoo. |
 
 ## health
 
@@ -176,6 +178,12 @@ hermes skills uninstall <skill-name>
 | [**oss-forensics**](/docs/user-guide/skills/optional/security/security-oss-forensics) | Supply chain investigation, evidence recovery, and forensic analysis for GitHub repositories. Covers deleted commit recovery, force-push detection, IOC extraction, multi-source evidence collection, hypothesis formation/validation, and st... |
 | [**sherlock**](/docs/user-guide/skills/optional/security/security-sherlock) | OSINT username search across 400+ social networks. Hunt down social media accounts by username. |
 
+## software-development
+
+| Skill | Description |
+|-------|-------------|
+| [**rest-graphql-debug**](/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug) | Debug REST/GraphQL APIs: status codes, auth, schemas, repro. |
+
 ## web-development
 
 | Skill | Description |
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md b/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md
index 7877e174c7a..38610be8b83 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md
@@ -16,8 +16,8 @@ Generate images, video, and audio with ComfyUI — install, launch, manage nodes
 |---|---|
 | Source | Bundled (installed by default) |
 | Path | `skills/creative/comfyui` |
-| Version | `5.0.0` |
-| Author | ['kshitijk4poor', 'alt-glitch'] |
+| Version | `5.1.0` |
+| Author | ['kshitijk4poor', 'alt-glitch', 'purzbeats'] |
 | License | MIT |
 | Platforms | macos, linux, windows |
 | Tags | `comfyui`, `image-generation`, `stable-diffusion`, `flux`, `sd3`, `wan-video`, `hunyuan-video`, `creative`, `generative-ai`, `video-generation` |
@@ -42,6 +42,12 @@ for workflow execution.
 - `official-cli.md` — every `comfy ...` command, with flags
 - `rest-api.md` — REST + WebSocket endpoints (local + cloud), payload schemas
 - `workflow-format.md` — API-format JSON, common node types, param mapping
+- `template-integrity.md` — converting `comfyui-workflow-templates` from
+  editor format to API format: Reroute bypass, dotted dynamic-input keys
+  (`values.a`, `resize_type.width`), Cloud quirks (302 redirect, 1 concurrent
+  free-tier job, 1080p VRAM ceiling), Discord-compatible ffmpeg stitch.
+  Authored by [@purzbeats](https://github.com/purzbeats). Load this whenever
+  you're starting from an official template.
 
 **Scripts (`scripts/`):**
 
diff --git a/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md b/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md
index dac9de9f174..28d51c17887 100644
--- a/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md
+++ b/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md
@@ -65,6 +65,29 @@ kanban_complete(
 )
 ```
 
+**Coding task that needs human review (review-required):**
+
+For most code-changing tasks, the work isn't truly *done* until a human reviewer has eyes on it. Block instead of complete, with `reason` prefixed `review-required: ` so the dashboard surfaces the row as needing review. Drop the structured metadata (changed files, test counts, diff/PR url) into a comment first, since `kanban_block` only carries the human-readable reason — comments are the durable annotation channel. Reviewer either approves and runs `hermes kanban unblock <id>` (which re-spawns you with the comment thread for any follow-ups) or asks for changes via another comment.
+
+```python
+import json
+
+kanban_comment(
+    body="review-required handoff:\n" + json.dumps({
+        "changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"],
+        "tests_run": 14,
+        "tests_passed": 14,
+        "diff_path": "/path/to/worktree",  # or PR url if pushed
+        "decisions": ["user_id primary, IP fallback for unauthenticated requests"],
+    }, indent=2),
+)
+kanban_block(
+    reason="review-required: rate limiter shipped, 14/14 tests pass — needs eyes on the user_id/IP fallback choice before merging",
+)
+```
+
+Use `kanban_complete` only when the task is genuinely terminal — e.g. a one-line typo fix, a docs change with no functional consequences, or a research task where the artifact IS the writeup itself.
+
 **Research task:**
 ```python
 kanban_complete(
diff --git a/website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md b/website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md
new file mode 100644
index 00000000000..8651bc979f6
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid.md
@@ -0,0 +1,228 @@
+---
+title: "Hyperliquid — Hyperliquid market data, account history, trade review"
+sidebar_label: "Hyperliquid"
+description: "Hyperliquid market data, account history, trade review"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Hyperliquid
+
+Hyperliquid market data, account history, trade review.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/blockchain/hyperliquid` |
+| Path | `optional-skills/blockchain/hyperliquid` |
+| Version | `0.1.0` |
+| Author | Hugo Sequier (Hugo-SEQUIER), Hermes Agent |
+| License | MIT |
+| Platforms | linux, macos, windows |
+| Tags | `Hyperliquid`, `Blockchain`, `Crypto`, `Trading`, `Perpetuals`, `Spot`, `DeFi` |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Hyperliquid Skill
+
+Query Hyperliquid market and account data through the public `/info` endpoint.
+Read-only — no API key, no signing, no order placement.
+
+12 commands: `dexs`, `markets`, `spots`, `candles`, `funding`, `l2`, `state`,
+`spot-balances`, `fills`, `orders`, `review`, `export`. Stdlib only
+(`urllib`, `json`, `argparse`).
+
+---
+
+## When to Use
+
+- User asks for Hyperliquid perp or spot market data, candles, funding, or L2 book
+- User wants to inspect a wallet's perp positions, spot balances, fills, or orders
+- User wants a post-trade review combining recent fills with market context
+- User wants to inspect builder-deployed perp dexs or HIP-3 markets
+- User wants a normalized JSON export of candles + funding for backtesting prep
+
+---
+
+## Prerequisites
+
+Stdlib only — no external packages, no API key.
+
+The script reads `~/.hermes/.env` for two optional defaults:
+
+- `HYPERLIQUID_API_URL` — defaults to `https://api.hyperliquid.xyz`. Set to
+  `https://api.hyperliquid-testnet.xyz` for testnet.
+- `HYPERLIQUID_USER_ADDRESS` — default address for `state`, `spot-balances`,
+  `fills`, `orders`, and `review`. If unset, pass the address as the first
+  positional argument.
+
+A project `.env` in the current working directory is honored as a dev fallback.
+
+Helper script: `~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py`
+
+---
+
+## How to Run
+
+Invoke through the `terminal` tool:
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py <command> [args]
+```
+
+Add `--json` to any command for machine-readable output.
+
+---
+
+## Quick Reference
+
+```bash
+hyperliquid_client.py dexs
+hyperliquid_client.py markets [--dex DEX] [--limit N] [--sort volume|oi|funding_abs|change_abs|name]
+hyperliquid_client.py spots [--limit N]
+hyperliquid_client.py candles <coin> [--interval 1h] [--hours 24] [--limit N]
+hyperliquid_client.py funding <coin> [--hours 72] [--limit N]
+hyperliquid_client.py l2 <coin> [--levels N]
+hyperliquid_client.py state [address] [--dex DEX]
+hyperliquid_client.py spot-balances [address] [--limit N]
+hyperliquid_client.py fills [address] [--hours N] [--limit N] [--aggregate-by-time]
+hyperliquid_client.py orders [address] [--limit N]
+hyperliquid_client.py review [address] [--coin COIN] [--hours N] [--fills N]
+hyperliquid_client.py export <coin> [--interval 1h] [--hours N] [--output PATH]
+```
+
+For `state`, `spot-balances`, `fills`, `orders`, and `review`, the address is
+optional when `HYPERLIQUID_USER_ADDRESS` is set in `~/.hermes/.env`.
+
+---
+
+## Procedure
+
+### 1. Discover DEXs and Markets
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py dexs
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  markets --limit 15 --sort volume
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  spots --limit 15
+```
+
+- `--dex` only applies to perp endpoints; omit for the first perp dex.
+- Spot pairs may show as `PURR/USDC` or aliases like `@107`.
+- HIP-3 markets prefix the coin with the dex, e.g. `mydex:BTC`.
+
+### 2. Pull Historical Market Data
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  candles BTC --interval 1h --hours 72 --limit 48
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  funding BTC --hours 168 --limit 30
+```
+
+Time-range endpoints paginate. For larger windows, repeat with a later
+`startTime` or use `export` (below).
+
+### 3. Inspect Live Order Book
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  l2 BTC --levels 10
+```
+
+Use when asked about book depth, near-term liquidity, or potential market
+impact of a large order.
+
+### 4. Review an Account
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  state 0xabc...
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  spot-balances
+```
+
+`state` returns perp positions; `spot-balances` returns spot inventory.
+Use these for "how are my positions?", "what am I holding?", "how much is
+withdrawable?".
+
+### 5. Review Fills and Orders
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  fills 0xabc... --hours 72 --limit 25
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  orders --limit 25
+```
+
+### 6. Generate a Trade Review
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  review 0xabc... --hours 72 --fills 50
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  review --coin BTC --hours 168
+```
+
+Reports realized PnL, fees, win/loss counts, coin breakdowns, market trend
+and average funding for each traded perp, plus heuristics (fee drag,
+concentration, counter-trend losses).
+
+For deeper post-trade analysis: start with `review` to find problem coins
+or windows → pull `fills` and `orders` for that period → pull `candles`
+and `funding` for each traded coin → judge decision quality separately
+from outcome quality.
+
+### 7. Export a Reusable Dataset
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  export BTC --interval 1h --hours 168 --output ./btc-1h-7d.json
+
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  export BTC --interval 15m --hours 72 --end-time-ms 1760000000000
+```
+
+Output JSON contains: schema version, source metadata, exact time window,
+normalized candle rows, normalized funding rows, summary stats. Use
+`--end-time-ms` for reproducible windows.
+
+---
+
+## Pitfalls
+
+- Public info endpoints are rate-limited. Large historical queries may
+  return capped windows; iterate with later `startTime` values.
+- `fills --hours ...` uses `userFillsByTime`, which only exposes a
+  recent rolling window — not full archive history.
+- `historicalOrders` returns recent orders only; not a full export.
+- The `review` command is heuristic. It cannot reconstruct intent,
+  order placement quality, or true slippage from fills alone.
+- The `export` command writes a normalized dataset, not a backtest
+  engine. You still need your own slippage/fill model.
+- Spot aliases like `@107` are valid identifiers even when the UI shows
+  a friendlier name.
+- `l2` is a point-in-time snapshot, not a time series.
+
+---
+
+## Verification
+
+```bash
+python3 ~/.hermes/skills/blockchain/hyperliquid/scripts/hyperliquid_client.py \
+  markets --limit 5
+```
+
+Should print the top Hyperliquid perp markets by 24h notional volume.
diff --git a/website/docs/user-guide/skills/optional/finance/finance-stocks.md b/website/docs/user-guide/skills/optional/finance/finance-stocks.md
new file mode 100644
index 00000000000..7c43dea3065
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/finance/finance-stocks.md
@@ -0,0 +1,112 @@
+---
+title: "Stocks — Stock quotes, history, search, compare, crypto via Yahoo"
+sidebar_label: "Stocks"
+description: "Stock quotes, history, search, compare, crypto via Yahoo"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Stocks
+
+Stock quotes, history, search, compare, crypto via Yahoo.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/finance/stocks` |
+| Path | `optional-skills/finance/stocks` |
+| Version | `0.1.0` |
+| Author | Mibay (Mibayy), Hermes Agent |
+| License | MIT |
+| Platforms | linux, macos, windows |
+| Tags | `Stocks`, `Finance`, `Market`, `Crypto`, `Investing` |
+| Related skills | [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`comps-analysis`](/docs/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Stocks Skill
+
+Read-only market data via Yahoo Finance. Five commands: `quote`, `search`,
+`history`, `compare`, `crypto`. Python stdlib only — no API key, no pip
+installs. Yahoo's endpoint is unofficial and may rate-limit or change.
+
+## When to Use
+
+- User asks for a current stock price (AAPL, TSLA, MSFT, ...)
+- User wants to look up a ticker by company name
+- User wants OHLCV history or performance over a date range
+- User wants to compare several tickers side by side
+- User asks for a crypto price (BTC, ETH, SOL, ...)
+
+## Prerequisites
+
+Python 3.8+ stdlib only. Optional: set `ALPHA_VANTAGE_KEY` to enrich
+`market_cap`, `pe_ratio`, and 52-week levels when Yahoo's crumb-protected
+fields come back null. Free key: https://www.alphavantage.co/support/#api-key
+
+## How to Run
+
+Invoke through the `terminal` tool. Once installed:
+
+```
+SCRIPT=~/.hermes/skills/finance/stocks/scripts/stocks_client.py
+python3 $SCRIPT quote AAPL
+```
+
+All output is JSON on stdout — pipe through `jq` if you want to slice it.
+
+## Quick Reference
+
+```
+python3 $SCRIPT quote AAPL
+python3 $SCRIPT quote AAPL MSFT GOOGL TSLA
+python3 $SCRIPT search "Tesla"
+python3 $SCRIPT history NVDA --range 6mo
+python3 $SCRIPT compare AAPL MSFT GOOGL
+python3 $SCRIPT crypto BTC ETH SOL
+```
+
+## Commands
+
+### `quote SYMBOL [SYMBOL2 ...]`
+
+Current price, change, change%, volume, 52-week high/low.
+
+### `search QUERY`
+
+Find tickers by company name. Returns top 5: symbol, name, exchange, type.
+
+### `history SYMBOL [--range RANGE]`
+
+Daily OHLCV plus stats (min, max, avg, total return %). Ranges: `1mo`,
+`3mo`, `6mo`, `1y`, `5y`. Default: `1mo`.
+
+### `compare SYMBOL1 SYMBOL2 [...]`
+
+Side-by-side: price, change%, 52-week performance.
+
+### `crypto SYMBOL [SYMBOL2 ...]`
+
+Crypto prices. Pass `BTC` (the script appends `-USD` automatically).
+
+## Pitfalls
+
+- Yahoo Finance's API is unofficial. Endpoints can change or rate-limit
+  without notice — if requests start failing, that's why.
+- `market_cap` and `pe_ratio` may return null on `quote` when Yahoo's
+  crumb session isn't established. Set `ALPHA_VANTAGE_KEY` to backfill.
+- Add a small delay between bulk requests to avoid rate-limiting.
+- This is read-only — no order placement, no account integration.
+
+## Verification
+
+```
+python3 ~/.hermes/skills/finance/stocks/scripts/stocks_client.py quote AAPL
+```
+
+Returns a JSON object with `symbol: "AAPL"` and a numeric `price` field.
diff --git a/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md b/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md
new file mode 100644
index 00000000000..0698d855f5f
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md
@@ -0,0 +1,531 @@
+---
+title: "Rest Graphql Debug — Debug REST/GraphQL APIs: status codes, auth, schemas, repro"
+sidebar_label: "Rest Graphql Debug"
+description: "Debug REST/GraphQL APIs: status codes, auth, schemas, repro"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Rest Graphql Debug
+
+Debug REST/GraphQL APIs: status codes, auth, schemas, repro.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/software-development/rest-graphql-debug` |
+| Path | `optional-skills/software-development/rest-graphql-debug` |
+| Version | `1.2.0` |
+| Author | eren-karakus0 |
+| License | MIT |
+| Tags | `api`, `rest`, `graphql`, `http`, `debugging`, `testing`, `curl`, `integration` |
+| Related skills | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# API Testing & Debugging
+
+Drive REST and GraphQL diagnosis through Hermes tools — `terminal` for `curl`, `execute_code` for Python `requests`, `web_extract` for vendor docs. Isolate the failing layer before guessing at the fix.
+
+## When to Use
+
+- API returns unexpected status or body
+- Auth fails (401/403 after token refresh, OAuth, API key)
+- Works in Postman but fails in code
+- Webhook / callback integration debugging
+- Building or reviewing API integration tests
+- Rate limiting or pagination issues
+
+Skip for UI rendering, DB query tuning, or DNS/firewall infra (escalate).
+
+## Core Principle
+
+**Isolate the layer, then fix.** A 200 OK can hide broken data. A 500 can mask a one-character auth typo. Walk the chain in order; never skip a step.
+
+```
+1. Connectivity   → can we reach the host at all?
+1.5 Timeouts      → connect-slow vs read-slow?
+2. TLS/SSL        → cert valid and trusted?
+3. Auth           → credentials correct and unexpired?
+4. Request format → payload shape match server expectations?
+5. Response parse → does our code accept what came back?
+6. Semantics      → does the data mean what we assume?
+```
+
+## 5-Minute Quickstart
+
+### REST via terminal
+
+```python
+# Verbose request/response exchange
+terminal('curl -v https://api.example.com/users/1')
+
+# POST with JSON
+terminal("""curl -X POST https://api.example.com/users \\
+  -H 'Content-Type: application/json' \\
+  -H "Authorization: Bearer $TOKEN" \\
+  -d '{"name":"test","email":"test@example.com"}'""")
+
+# Headers only
+terminal('curl -sI https://api.example.com/health')
+
+# Pretty-print JSON
+terminal('curl -s https://api.example.com/users | python3 -m json.tool')
+```
+
+### GraphQL via terminal
+
+```python
+terminal("""curl -X POST https://api.example.com/graphql \\
+  -H 'Content-Type: application/json' \\
+  -H "Authorization: Bearer $TOKEN" \\
+  -d '{"query":"{ user(id: 1) { name email } }"}'""")
+```
+
+**GraphQL gotcha:** servers often return HTTP 200 even when the query failed. Always inspect the `errors` field regardless of status code:
+
+```python
+execute_code('''
+import os, requests
+resp = requests.post(
+    "https://api.example.com/graphql",
+    json={"query": "{ user(id: 1) { name email } }"},
+    headers={"Authorization": f"Bearer {os.environ['TOKEN']}"},
+    timeout=10,
+)
+data = resp.json()
+if data.get("errors"):
+    for err in data["errors"]:
+        print(f"GraphQL error: {err['message']} (path: {err.get('path')})")
+print(data.get("data"))
+''')
+```
+
+### Python (requests) via execute_code
+
+```python
+execute_code('''
+import requests
+resp = requests.get(
+    "https://api.example.com/users/1",
+    headers={"Authorization": "Bearer <TOKEN>"},
+    timeout=(3.05, 30),  # (connect, read)
+)
+print(resp.status_code, dict(resp.headers))
+print(resp.text[:500])
+''')
+```
+
+## Layered Debug Flow
+
+### Step 1 — Connectivity
+
+```python
+terminal('nslookup api.example.com')
+terminal('curl -v --connect-timeout 5 https://api.example.com/health')
+```
+
+Failures: DNS not resolving, firewall, VPN required, proxy missing.
+
+### Step 1.5 — Timeouts
+
+Distinguish *can't reach* from *reaches but slow*:
+
+```python
+terminal('''curl -w "dns:%{time_namelookup}s connect:%{time_connect}s tls:%{time_appconnect}s ttfb:%{time_starttransfer}s total:%{time_total}s\\n" \\
+  -o /dev/null -s https://api.example.com/endpoint''')
+```
+
+In Python, always pass a tuple timeout — `requests` has no default and will hang forever:
+
+```python
+execute_code('''
+import requests
+from requests.exceptions import ConnectTimeout, ReadTimeout
+try:
+    requests.get(url, timeout=(3.05, 30))
+except ConnectTimeout:
+    print("Cannot reach host — DNS, firewall, VPN")
+except ReadTimeout:
+    print("Connected but server is slow")
+''')
+```
+
+Diagnosis: high `time_connect` is network/firewall; high `time_starttransfer` with low `time_connect` is a slow server.
+
+### Step 2 — TLS/SSL
+
+```python
+terminal('curl -vI https://api.example.com 2>&1 | grep -E "SSL|subject|expire|issuer"')
+```
+
+Failures: expired cert, self-signed, hostname mismatch, missing CA bundle. Use `-k` only for ad-hoc debug, never in code.
+
+### Step 3 — Authentication
+
+```python
+# Token validity check
+terminal('curl -s -o /dev/null -w "%{http_code}\\n" -H "Authorization: Bearer $TOKEN" https://api.example.com/me')
+
+# Decode JWT exp claim — handles base64url padding correctly
+execute_code('''
+import json, base64, os
+tok = os.environ["TOKEN"]
+payload = tok.split(".")[1]
+payload += "=" * (-len(payload) % 4)
+print(json.dumps(json.loads(base64.urlsafe_b64decode(payload)), indent=2))
+''')
+```
+
+Checklist:
+- Token expired? (`exp` claim in JWT)
+- Right scheme? Bearer vs Basic vs Token vs `X-Api-Key`
+- Right environment? Staging key on prod is a classic
+- API key in header vs query param (`?api_key=…`)?
+
+### Step 4 — Request Format
+
+```python
+terminal("""curl -v -X POST https://api.example.com/endpoint \\
+  -H 'Content-Type: application/json' \\
+  -d '{"key":"value"}' 2>&1""")
+```
+
+**Content-Type / body mismatch — the silent 415/400:**
+
+```python
+# WRONG — data= sends form-encoded, header lies
+requests.post(url, data='{"k":"v"}', headers={"Content-Type": "application/json"})
+
+# RIGHT — json= auto-sets header AND serializes
+requests.post(url, json={"k": "v"})
+
+# WRONG — Accept says XML, code calls .json()
+requests.get(url, headers={"Accept": "text/xml"})
+
+# RIGHT — let requests build multipart with boundary
+requests.post(url, files={"file": open("doc.pdf", "rb")})
+```
+
+Common: form-encoded vs JSON, missing required fields, wrong HTTP method, unencoded query params.
+
+### Step 5 — Response Parsing
+
+Always inspect content-type before calling `.json()`:
+
+```python
+execute_code('''
+import requests
+resp = requests.post(url, json=payload, timeout=10)
+print(f"status={resp.status_code}")
+print(f"headers={dict(resp.headers)}")
+ct = resp.headers.get("Content-Type", "")
+if "application/json" in ct:
+    print(resp.json())
+else:
+    print(f"unexpected content-type {ct!r}, body={resp.text[:500]!r}")
+''')
+```
+
+Failures: HTML error page where JSON expected, empty body, wrong charset.
+
+### Step 6 — Semantic Validation
+
+Parsed cleanly — but is the data *correct*?
+
+- Does `"status": "active"` mean what your code thinks?
+- ID in response matches the one requested?
+- Timestamps in expected timezone?
+- Pagination returning all results, or just page 1?
+
+## HTTP Status Playbook
+
+### 401 Unauthorized — credentials missing or invalid
+
+1. `Authorization` header actually present? (`curl -v` to confirm)
+2. Token correct and unexpired?
+3. Right auth scheme? (`Bearer` vs `Basic` vs `Token`)
+4. Some APIs use query param (`?api_key=…`) instead of header.
+
+### 403 Forbidden — authenticated but not authorized
+
+1. Token has the required scopes/permissions?
+2. Resource owned by a different account?
+3. IP allowlist blocking you?
+4. CORS in browser? (check `Access-Control-Allow-Origin`)
+
+### 404 Not Found — resource doesn't exist or URL is wrong
+
+1. Path correct? (trailing slash, typo, version prefix)
+2. Resource ID exists?
+3. Right API version (`/v1/` vs `/v2/`)?
+4. Right base URL (staging vs prod)?
+
+### 409 Conflict — state collision
+
+1. Resource already exists (duplicate create)?
+2. Stale `ETag` / `If-Match`?
+3. Concurrent modification by another process?
+
+### 422 Unprocessable Entity — valid JSON, invalid data
+
+The error body usually names the bad fields. Check:
+- Field types (string vs int, date format)
+- Required vs optional
+- Enum values inside the allowed set
+
+### 429 Too Many Requests — rate limited
+
+Check `Retry-After` and `X-RateLimit-*` headers. Exponential backoff:
+
+```python
+execute_code('''
+import time, requests
+
+def with_backoff(method, url, **kwargs):
+    for attempt in range(5):
+        resp = requests.request(method, url, **kwargs)
+        if resp.status_code != 429:
+            return resp
+        wait = int(resp.headers.get("Retry-After", 2 ** attempt))
+        time.sleep(wait)
+    return resp
+''')
+```
+
+### 5xx — server-side, usually not your fault
+
+- **500** — server bug. Capture correlation ID, file with provider.
+- **502** — upstream down. Backoff + retry.
+- **503** — overloaded / maintenance. Check status page.
+- **504** — upstream timeout. Reduce payload or raise timeout.
+
+For all 5xx: backoff with jitter, alert on persistence.
+
+## Pagination & Idempotency
+
+**Pagination.** Verify you're getting *all* results. Look for `next_cursor`, `next_page`, `total_count`. Two patterns:
+- Offset (`?limit=100&offset=200`) — simple, can skip items if data shifts.
+- Cursor (`?cursor=abc123`) — preferred for live or large datasets.
+
+**Idempotency.** For non-idempotent operations (POST), send `Idempotency-Key: <uuid>` so retries don't double-charge / double-create. Mandatory for payments and orders.
+
+## Contract Validation
+
+Catch schema drift before it hits production:
+
+```python
+execute_code('''
+import requests
+
+def validate_user(data: dict) -> list[str]:
+    errors = []
+    required = {"id": int, "email": str, "created_at": str}
+    for field, expected in required.items():
+        if field not in data:
+            errors.append(f"missing field: {field}")
+        elif not isinstance(data[field], expected):
+            errors.append(f"{field}: want {expected.__name__}, got {type(data[field]).__name__}")
+    return errors
+
+resp = requests.get(f"{BASE}/users/1", headers=HEADERS, timeout=10)
+issues = validate_user(resp.json())
+if issues:
+    print(f"contract violations: {issues}")
+''')
+```
+
+Run after API upgrades, when integrating new third parties, or in CI smoke tests.
+
+## Correlation IDs
+
+Always capture the provider's request ID — fastest path to vendor support:
+
+```python
+execute_code('''
+import requests
+resp = requests.post(url, json=payload, headers=headers, timeout=10)
+request_id = (
+    resp.headers.get("X-Request-Id")
+    or resp.headers.get("X-Trace-Id")
+    or resp.headers.get("CF-Ray")  # Cloudflare
+)
+if resp.status_code >= 400:
+    print(f"failed status={resp.status_code} req_id={request_id} ts={resp.headers.get('Date')}")
+''')
+```
+
+**Vendor bug-report template:**
+
+```
+Endpoint:    POST /api/v1/orders
+Request ID:  req_abc123xyz
+Timestamp:   2026-03-17T14:30:00Z
+Status:      500
+Expected:    201 with order object
+Actual:      500 {"error":"internal server error"}
+Repro:       curl -X POST … (auth: <REDACTED>)
+```
+
+## Regression Test Template
+
+Drop this into `tests/` and run via `terminal('pytest tests/test_api_smoke.py -v')`:
+
+```python
+import os, requests, pytest
+
+BASE_URL = os.environ.get("API_BASE_URL", "https://api.example.com")
+TOKEN    = os.environ.get("API_TOKEN", "")
+HEADERS  = {"Authorization": f"Bearer {TOKEN}"}
+
+class TestAPISmoke:
+    def test_health(self):
+        resp = requests.get(f"{BASE_URL}/health", timeout=5)
+        assert resp.status_code == 200
+
+    def test_list_users_returns_array(self):
+        resp = requests.get(f"{BASE_URL}/users", headers=HEADERS, timeout=10)
+        assert resp.status_code == 200
+        data = resp.json()
+        assert isinstance(data.get("data", data), list)
+
+    def test_get_user_required_fields(self):
+        resp = requests.get(f"{BASE_URL}/users/1", headers=HEADERS, timeout=10)
+        assert resp.status_code in (200, 404)
+        if resp.status_code == 200:
+            user = resp.json()
+            assert "id" in user and "email" in user
+
+    def test_invalid_auth_returns_401(self):
+        resp = requests.get(
+            f"{BASE_URL}/users",
+            headers={"Authorization": "Bearer invalid-token"},
+            timeout=10,
+        )
+        assert resp.status_code == 401
+```
+
+## Security
+
+### Token handling
+- Never log full tokens. Redact: `Bearer <REDACTED>`.
+- Never hardcode tokens in scripts. Read from env (`os.environ["API_TOKEN"]`) or `~/.hermes/.env`.
+- Rotate immediately if a token surfaces in logs, error messages, or git history.
+
+### Safe logging
+
+```python
+def redact_auth(headers: dict) -> dict:
+    sensitive = {"authorization", "x-api-key", "cookie", "set-cookie"}
+    return {k: ("<REDACTED>" if k.lower() in sensitive else v) for k, v in headers.items()}
+```
+
+### Leak checklist
+
+- [ ] **Credentials in URLs.** API keys in query strings end up in server logs, browser history, referrer headers — use headers.
+- [ ] **PII in error responses.** `404 on /users/123` shouldn't reveal whether the user exists (enumeration).
+- [ ] **Stack traces in prod.** 500s shouldn't leak file paths, framework versions.
+- [ ] **Internal hostnames/IPs.** `10.x.x.x`, `internal-api.corp.local` in error bodies.
+- [ ] **Tokens echoed back.** Some APIs include the auth token in error details. Verify they don't.
+- [ ] **Verbose `Server` / `X-Powered-By`.** Stack-info leaks. Note for security review.
+
+## Hermes Tool Patterns
+
+### terminal — for curl, dig, openssl
+
+```python
+terminal('curl -sI https://api.example.com')
+terminal('openssl s_client -connect api.example.com:443 -servername api.example.com </dev/null 2>/dev/null | openssl x509 -noout -dates')
+```
+
+### execute_code — for multi-step Python flows
+
+When debugging spans auth → fetch → paginate → validate, use `execute_code`. Variables persist for the script, results print to stdout, no risk of token spam in your context:
+
+```python
+execute_code('''
+import os, requests
+
+token = os.environ["API_TOKEN"]
+base  = "https://api.example.com"
+H     = {"Authorization": f"Bearer {token}"}
+
+# 1. auth
+me = requests.get(f"{base}/me", headers=H, timeout=10)
+print(f"auth {me.status_code}")
+
+# 2. paginate
+all_users, cursor = [], None
+while True:
+    params = {"cursor": cursor} if cursor else {}
+    r = requests.get(f"{base}/users", headers=H, params=params, timeout=10)
+    body = r.json()
+    all_users.extend(body["data"])
+    cursor = body.get("next_cursor")
+    if not cursor:
+        break
+print(f"users={len(all_users)}")
+''')
+```
+
+### web_extract — for vendor API docs
+
+Pull the spec for the endpoint you're debugging instead of guessing:
+
+```python
+web_extract(urls=["https://docs.example.com/api/v1/users"])
+```
+
+### delegate_task — for full CRUD test sweeps
+
+```python
+delegate_task(
+    goal="Test all CRUD endpoints for /api/v1/users",
+    context="""
+Follow the rest-graphql-debug skill (optional-skills/software-development/rest-graphql-debug).
+Base URL: https://api.example.com
+Auth: Bearer token from API_TOKEN env var.
+
+For each verb (POST, GET, PATCH, DELETE):
+  - happy path: assert status + response schema
+  - error cases: 400, 404, 422
+  - log a repro curl for any failure (redact tokens)
+
+Output: pass/fail per endpoint + correlation IDs for failures.
+""",
+    toolsets=["terminal", "file"],
+)
+```
+
+## Output Format
+
+When reporting findings:
+
+```
+## Finding
+Endpoint: POST /api/v1/users
+Status:   422 Unprocessable Entity
+Req ID:   req_abc123xyz
+
+## Repro
+curl -X POST https://api.example.com/api/v1/users \
+  -H 'Content-Type: application/json' \
+  -H 'Authorization: Bearer <REDACTED>' \
+  -d '{"name":"test"}'
+
+## Root Cause
+Missing required field `email`. Server validation rejects before processing.
+
+## Fix
+-d '{"name":"test","email":"test@example.com"}'
+```
+
+## Related
+
+- `systematic-debugging` — once the failing API layer is isolated, root-cause your code
+- `test-driven-development` — write the regression test before shipping the fix
diff --git a/website/scripts/generate-skill-docs.py b/website/scripts/generate-skill-docs.py
index d55c6e55c31..2a0694a61c8 100755
--- a/website/scripts/generate-skill-docs.py
+++ b/website/scripts/generate-skill-docs.py
@@ -622,38 +622,70 @@ def build_sidebar_items(entries: list[tuple[dict[str, Any], dict[str, Any]]]) ->
     }
 
 
-def write_sidebar(entries):
-    # The per-skill pages (`build_sidebar_items(entries)`) are still generated
-    # as standalone docs under `website/docs/user-guide/skills/{bundled,optional}/`
-    # and reachable via the catalog pages in Reference — but we intentionally
-    # do NOT explode them into the left sidebar. Two hundred-plus skill entries
-    # drown the actual product docs and make the site feel overwhelming to
-    # first-time visitors.
-    #
-    # Sidebar now shows:
-    #   Skills
-    #   ├── Bundled catalog →    (link to reference/skills-catalog)
-    #   └── Optional catalog →   (link to reference/optional-skills-catalog)
-    #
-    # The catalog pages are auto-regenerated tables with a link to every skill.
-    # Individual skill pages (including the two formerly hand-written guides,
-    # godmode and google-workspace) are still reachable at their URLs and are
-    # linked from the catalog tables and from the Skills overview page — they
-    # just aren't promoted in the left sidebar, because there's no principled
-    # rule for which skills would get promoted and which wouldn't.
-    _ = build_sidebar_items(entries)  # still called for any side effects / validation
+def _render_sidebar_item(item: Any, indent: int) -> list[str]:
+    """Render one sidebar item (string doc id, or category dict) as ts lines."""
+    pad = " " * indent
+    lines: list[str] = []
+    if isinstance(item, str):
+        lines.append(f"{pad}'{item}',")
+        return lines
+    # category dict
+    lines.append(f"{pad}{{")
+    lines.append(f"{pad}  type: 'category',")
+    lines.append(f"{pad}  label: '{item['label']}',")
+    if item.get("collapsed", True):
+        lines.append(f"{pad}  collapsed: true,")
+    lines.append(f"{pad}  items: [")
+    for child in item.get("items", []):
+        lines.extend(_render_sidebar_item(child, indent + 4))
+    lines.append(f"{pad}  ],")
+    lines.append(f"{pad}}},")
+    return lines
 
-    skills_subtree = (
-        "        {\n"
-        "          type: 'category',\n"
-        "          label: 'Skills',\n"
-        "          collapsed: true,\n"
-        "          items: [\n"
-        "            'reference/skills-catalog',\n"
-        "            'reference/optional-skills-catalog',\n"
-        "          ],\n"
-        "        },\n"
-    )
+
+def write_sidebar(entries):
+    # Sidebar layout:
+    #   Skills
+    #   ├── reference/skills-catalog
+    #   ├── reference/optional-skills-catalog
+    #   ├── Bundled
+    #   │   ├── apple/
+    #   │   │   ├── apple-apple-notes
+    #   │   │   └── ...
+    #   │   └── ...
+    #   └── Optional
+    #       └── ...
+    #
+    # The two catalog index pages stay at the top of the Skills section so
+    # the at-a-glance table view is one click away, and the per-category
+    # subtrees give individual skill pages real sidebar navigation when
+    # users land on them directly.
+    tree = build_sidebar_items(entries)
+
+    skills_block: list[dict[str, Any]] = [
+        {
+            "label": "Bundled",
+            "collapsed": True,
+            "items": tree["bundled_categories"],
+        },
+        {
+            "label": "Optional",
+            "collapsed": True,
+            "items": tree["optional_categories"],
+        },
+    ]
+    skills_items: list[Any] = [
+        "reference/skills-catalog",
+        "reference/optional-skills-catalog",
+        *skills_block,
+    ]
+
+    skills_top = {
+        "label": "Skills",
+        "collapsed": True,
+        "items": skills_items,
+    }
+    skills_subtree = "\n".join(_render_sidebar_item(skills_top, 8)) + "\n"
 
     sidebar_path = REPO / "website" / "sidebars.ts"
     text = sidebar_path.read_text(encoding="utf-8")
diff --git a/website/sidebars.ts b/website/sidebars.ts
index f0a0658c3bf..fe7b741eb2e 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -113,6 +113,444 @@ const sidebars: SidebarsConfig = {
           items: [
             'reference/skills-catalog',
             'reference/optional-skills-catalog',
+            {
+              type: 'category',
+              label: 'Bundled',
+              collapsed: true,
+              items: [
+                {
+                  type: 'category',
+                  label: 'apple',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/apple/apple-apple-notes',
+                    'user-guide/skills/bundled/apple/apple-apple-reminders',
+                    'user-guide/skills/bundled/apple/apple-findmy',
+                    'user-guide/skills/bundled/apple/apple-imessage',
+                    'user-guide/skills/bundled/apple/apple-macos-computer-use',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'autonomous-ai-agents',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code',
+                    'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex',
+                    'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent',
+                    'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'creative',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/creative/creative-architecture-diagram',
+                    'user-guide/skills/bundled/creative/creative-ascii-art',
+                    'user-guide/skills/bundled/creative/creative-ascii-video',
+                    'user-guide/skills/bundled/creative/creative-baoyu-comic',
+                    'user-guide/skills/bundled/creative/creative-baoyu-infographic',
+                    'user-guide/skills/bundled/creative/creative-claude-design',
+                    'user-guide/skills/bundled/creative/creative-comfyui',
+                    'user-guide/skills/bundled/creative/creative-creative-ideation',
+                    'user-guide/skills/bundled/creative/creative-design-md',
+                    'user-guide/skills/bundled/creative/creative-excalidraw',
+                    'user-guide/skills/bundled/creative/creative-humanizer',
+                    'user-guide/skills/bundled/creative/creative-manim-video',
+                    'user-guide/skills/bundled/creative/creative-p5js',
+                    'user-guide/skills/bundled/creative/creative-pixel-art',
+                    'user-guide/skills/bundled/creative/creative-popular-web-designs',
+                    'user-guide/skills/bundled/creative/creative-pretext',
+                    'user-guide/skills/bundled/creative/creative-sketch',
+                    'user-guide/skills/bundled/creative/creative-songwriting-and-ai-music',
+                    'user-guide/skills/bundled/creative/creative-touchdesigner-mcp',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'data-science',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'devops',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/devops/devops-kanban-orchestrator',
+                    'user-guide/skills/bundled/devops/devops-kanban-worker',
+                    'user-guide/skills/bundled/devops/devops-webhook-subscriptions',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'dogfood',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/dogfood/dogfood-dogfood',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'email',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/email/email-himalaya',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'gaming',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server',
+                    'user-guide/skills/bundled/gaming/gaming-pokemon-player',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'github',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/github/github-codebase-inspection',
+                    'user-guide/skills/bundled/github/github-github-auth',
+                    'user-guide/skills/bundled/github/github-github-code-review',
+                    'user-guide/skills/bundled/github/github-github-issues',
+                    'user-guide/skills/bundled/github/github-github-pr-workflow',
+                    'user-guide/skills/bundled/github/github-github-repo-management',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'mcp',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/mcp/mcp-native-mcp',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'media',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/media/media-gif-search',
+                    'user-guide/skills/bundled/media/media-heartmula',
+                    'user-guide/skills/bundled/media/media-songsee',
+                    'user-guide/skills/bundled/media/media-spotify',
+                    'user-guide/skills/bundled/media/media-youtube-content',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'mlops',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/mlops/mlops-models-audiocraft',
+                    'user-guide/skills/bundled/mlops/mlops-research-dspy',
+                    'user-guide/skills/bundled/mlops/mlops-huggingface-hub',
+                    'user-guide/skills/bundled/mlops/mlops-inference-llama-cpp',
+                    'user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness',
+                    'user-guide/skills/bundled/mlops/mlops-inference-obliteratus',
+                    'user-guide/skills/bundled/mlops/mlops-models-segment-anything',
+                    'user-guide/skills/bundled/mlops/mlops-inference-vllm',
+                    'user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'note-taking',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/note-taking/note-taking-obsidian',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'productivity',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/productivity/productivity-airtable',
+                    'user-guide/skills/bundled/productivity/productivity-google-workspace',
+                    'user-guide/skills/bundled/productivity/productivity-linear',
+                    'user-guide/skills/bundled/productivity/productivity-maps',
+                    'user-guide/skills/bundled/productivity/productivity-nano-pdf',
+                    'user-guide/skills/bundled/productivity/productivity-notion',
+                    'user-guide/skills/bundled/productivity/productivity-ocr-and-documents',
+                    'user-guide/skills/bundled/productivity/productivity-powerpoint',
+                    'user-guide/skills/bundled/productivity/productivity-teams-meeting-pipeline',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'red-teaming',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/red-teaming/red-teaming-godmode',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'research',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/research/research-arxiv',
+                    'user-guide/skills/bundled/research/research-blogwatcher',
+                    'user-guide/skills/bundled/research/research-llm-wiki',
+                    'user-guide/skills/bundled/research/research-polymarket',
+                    'user-guide/skills/bundled/research/research-research-paper-writing',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'smart-home',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/smart-home/smart-home-openhue',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'social-media',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/social-media/social-media-xurl',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'software-development',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands',
+                    'user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring',
+                    'user-guide/skills/bundled/software-development/software-development-node-inspect-debugger',
+                    'user-guide/skills/bundled/software-development/software-development-plan',
+                    'user-guide/skills/bundled/software-development/software-development-python-debugpy',
+                    'user-guide/skills/bundled/software-development/software-development-requesting-code-review',
+                    'user-guide/skills/bundled/software-development/software-development-spike',
+                    'user-guide/skills/bundled/software-development/software-development-subagent-driven-development',
+                    'user-guide/skills/bundled/software-development/software-development-systematic-debugging',
+                    'user-guide/skills/bundled/software-development/software-development-test-driven-development',
+                    'user-guide/skills/bundled/software-development/software-development-writing-plans',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'yuanbao',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/bundled/yuanbao/yuanbao-yuanbao',
+                  ],
+                },
+              ],
+            },
+            {
+              type: 'category',
+              label: 'Optional',
+              collapsed: true,
+              items: [
+                {
+                  type: 'category',
+                  label: 'autonomous-ai-agents',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox',
+                    'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'blockchain',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/blockchain/blockchain-evm',
+                    'user-guide/skills/optional/blockchain/blockchain-hyperliquid',
+                    'user-guide/skills/optional/blockchain/blockchain-solana',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'communication',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/communication/communication-one-three-one-rule',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'creative',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/creative/creative-blender-mcp',
+                    'user-guide/skills/optional/creative/creative-concept-diagrams',
+                    'user-guide/skills/optional/creative/creative-hyperframes',
+                    'user-guide/skills/optional/creative/creative-kanban-video-orchestrator',
+                    'user-guide/skills/optional/creative/creative-meme-generation',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'devops',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/devops/devops-cli',
+                    'user-guide/skills/optional/devops/devops-docker-management',
+                    'user-guide/skills/optional/devops/devops-watchers',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'dogfood',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'email',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/email/email-agentmail',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'finance',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/finance/finance-3-statement-model',
+                    'user-guide/skills/optional/finance/finance-comps-analysis',
+                    'user-guide/skills/optional/finance/finance-dcf-model',
+                    'user-guide/skills/optional/finance/finance-excel-author',
+                    'user-guide/skills/optional/finance/finance-lbo-model',
+                    'user-guide/skills/optional/finance/finance-merger-model',
+                    'user-guide/skills/optional/finance/finance-pptx-author',
+                    'user-guide/skills/optional/finance/finance-stocks',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'health',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/health/health-fitness-nutrition',
+                    'user-guide/skills/optional/health/health-neuroskill-bci',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'mcp',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/mcp/mcp-fastmcp',
+                    'user-guide/skills/optional/mcp/mcp-mcporter',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'migration',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/migration/migration-openclaw-migration',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'mlops',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/mlops/mlops-accelerate',
+                    'user-guide/skills/optional/mlops/mlops-training-axolotl',
+                    'user-guide/skills/optional/mlops/mlops-chroma',
+                    'user-guide/skills/optional/mlops/mlops-clip',
+                    'user-guide/skills/optional/mlops/mlops-faiss',
+                    'user-guide/skills/optional/mlops/mlops-flash-attention',
+                    'user-guide/skills/optional/mlops/mlops-guidance',
+                    'user-guide/skills/optional/mlops/mlops-huggingface-tokenizers',
+                    'user-guide/skills/optional/mlops/mlops-instructor',
+                    'user-guide/skills/optional/mlops/mlops-lambda-labs',
+                    'user-guide/skills/optional/mlops/mlops-llava',
+                    'user-guide/skills/optional/mlops/mlops-modal',
+                    'user-guide/skills/optional/mlops/mlops-nemo-curator',
+                    'user-guide/skills/optional/mlops/mlops-inference-outlines',
+                    'user-guide/skills/optional/mlops/mlops-peft',
+                    'user-guide/skills/optional/mlops/mlops-pinecone',
+                    'user-guide/skills/optional/mlops/mlops-pytorch-fsdp',
+                    'user-guide/skills/optional/mlops/mlops-pytorch-lightning',
+                    'user-guide/skills/optional/mlops/mlops-qdrant',
+                    'user-guide/skills/optional/mlops/mlops-saelens',
+                    'user-guide/skills/optional/mlops/mlops-simpo',
+                    'user-guide/skills/optional/mlops/mlops-slime',
+                    'user-guide/skills/optional/mlops/mlops-stable-diffusion',
+                    'user-guide/skills/optional/mlops/mlops-tensorrt-llm',
+                    'user-guide/skills/optional/mlops/mlops-torchtitan',
+                    'user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning',
+                    'user-guide/skills/optional/mlops/mlops-training-unsloth',
+                    'user-guide/skills/optional/mlops/mlops-whisper',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'productivity',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/productivity/productivity-canvas',
+                    'user-guide/skills/optional/productivity/productivity-here-now',
+                    'user-guide/skills/optional/productivity/productivity-memento-flashcards',
+                    'user-guide/skills/optional/productivity/productivity-shop-app',
+                    'user-guide/skills/optional/productivity/productivity-shopify',
+                    'user-guide/skills/optional/productivity/productivity-siyuan',
+                    'user-guide/skills/optional/productivity/productivity-telephony',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'research',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/research/research-bioinformatics',
+                    'user-guide/skills/optional/research/research-domain-intel',
+                    'user-guide/skills/optional/research/research-drug-discovery',
+                    'user-guide/skills/optional/research/research-duckduckgo-search',
+                    'user-guide/skills/optional/research/research-gitnexus-explorer',
+                    'user-guide/skills/optional/research/research-parallel-cli',
+                    'user-guide/skills/optional/research/research-qmd',
+                    'user-guide/skills/optional/research/research-scrapling',
+                    'user-guide/skills/optional/research/research-searxng-search',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'security',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/security/security-1password',
+                    'user-guide/skills/optional/security/security-oss-forensics',
+                    'user-guide/skills/optional/security/security-sherlock',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'software-development',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/software-development/software-development-rest-graphql-debug',
+                  ],
+                },
+                {
+                  type: 'category',
+                  label: 'web-development',
+                  collapsed: true,
+                  items: [
+                    'user-guide/skills/optional/web-development/web-development-page-agent',
+                  ],
+                },
+              ],
+            },
           ],
         },
       ],

From ce0e189d3e7185d6c8c6af924a1df23e17c6f85c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 17:11:06 -0700
Subject: [PATCH 093/218] fix(xai-oauth): break entitlement-403
 credential-refresh loop, bump grok-4.3 context to 1M (#26664)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don Piedro's 18-minute hang on grok-4.3 traced to two issues PR #26644
didn't cover:

- _recover_with_credential_pool classifies 403 as FailoverReason.auth
  and calls pool.try_refresh_current().  For xAI OAuth on an
  unsubscribed account, refresh succeeds (mints a new token from the
  same account) but the next API call 403s with the same entitlement
  error.  Result: infinite refresh → retry → 403 loop until Ctrl+C
  (1133s in Don's log).  New _is_entitlement_failure(error_context,
  status_code) detects the subscription-shape body ("do not have an
  active Grok subscription" / "out of available resources" + grok /
  "does not have permission" + grok) and short-circuits recovery so
  _summarize_api_error surfaces PR #26644's friendly hint.

- grok-4.3 resolved to 256k via the grok-4 catch-all in
  DEFAULT_CONTEXT_LENGTHS.  Per docs.x.ai/developers/models/grok-4.3
  the model ships with 1M context.  Add explicit grok-4.3 entry
  before the grok-4 fallback (longest-first substring matching
  ensures grok-4.3 and grok-4.3-latest both land on the new value).

Tests: 8 new (23 total in test_codex_xai_oauth_recovery.py).
E2E verified Don's 100-iteration loop bails out with 0 refresh calls
while genuine auth failures still refresh once and recover.
---
 agent/model_metadata.py                       |   1 +
 run_agent.py                                  |  56 ++++++
 .../test_codex_xai_oauth_recovery.py          | 190 ++++++++++++++++++
 3 files changed, 247 insertions(+)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index a10a01e3cc2..41e229416c9 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = {
     "grok-2-vision": 8192,      # grok-2-vision, -1212, -latest
     "grok-4-fast": 2000000,     # grok-4-fast-(non-)reasoning
     "grok-4.20": 2000000,       # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
+    "grok-4.3": 1000000,        # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai
     "grok-4": 256000,           # grok-4, grok-4-0709
     "grok-3": 131072,           # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
     "grok-2": 131072,           # grok-2, grok-2-1212, grok-2-latest
diff --git a/run_agent.py b/run_agent.py
index 2b20d48ede2..da47ca84e34 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4966,6 +4966,44 @@ class AIAgent:
         trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
         _save_trajectory_to_file(trajectory, self.model, completed)
 
+    @staticmethod
+    def _is_entitlement_failure(
+        error_context: Optional[Dict[str, Any]],
+        status_code: Optional[int],
+    ) -> bool:
+        """Detect subscription/entitlement 403s that masquerade as auth failures.
+
+        Returned True only when the body text matches a known entitlement
+        shape AND the status is 401/403.  Refreshing an OAuth token cannot
+        fix an unsubscribed account, so callers should surface the error
+        instead of looping the credential pool.
+
+        Current matches:
+          * xAI OAuth: "do not have an active Grok subscription" /
+            "out of available resources" / "does not have permission" + "grok"
+
+        Extend here for new providers as we discover them (Anthropic's
+        Claude Max OAuth entitlement errors look distinct enough today that
+        the existing 1M-context-beta branch handles them; revisit if other
+        subscription tiers start producing the same loop signature).
+        """
+        if status_code not in (401, 403, None):
+            return False
+        if not isinstance(error_context, dict):
+            return False
+        message = str(error_context.get("message") or "").lower()
+        reason = str(error_context.get("reason") or "").lower()
+        haystack = f"{message} {reason}"
+        if not haystack.strip():
+            return False
+        if "do not have an active grok subscription" in haystack:
+            return True
+        if "out of available resources" in haystack and "grok" in haystack:
+            return True
+        if "does not have permission" in haystack and "grok" in haystack:
+            return True
+        return False
+
     @staticmethod
     def _decorate_xai_entitlement_error(detail: str) -> str:
         """Append a friendly hint when xAI's OAuth surface returns an
@@ -7551,6 +7589,24 @@ class AIAgent:
             return False, True
 
         if effective_reason == FailoverReason.auth:
+            # Subscription/entitlement 403s look like auth failures on the
+            # wire but refresh cannot fix them — the OAuth token is
+            # already valid; the account simply lacks the entitlement
+            # (e.g. xAI OAuth without SuperGrok/X Premium for grok-4.3).
+            # Without this guard, ``try_refresh_current()`` keeps minting
+            # fresh tokens against the same unsubscribed account and the
+            # main agent loop spins re-issuing the same 403 until the
+            # user Ctrl+C's.  Surface the error instead so the friendly
+            # entitlement hint from ``_summarize_api_error`` can land.
+            if self._is_entitlement_failure(error_context, status_code):
+                logger.info(
+                    "Credential %s — entitlement-shaped 403 from %s; "
+                    "skipping pool refresh (account lacks subscription, "
+                    "not a transient auth failure).",
+                    status_code if status_code is not None else "auth",
+                    self.provider or "provider",
+                )
+                return False, has_retried_429
             refreshed = pool.try_refresh_current()
             if refreshed is not None:
                 logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
index 0f3603d2ca7..7c675f22225 100644
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -349,3 +349,193 @@ def test_codex_transport_native_codex_still_replays_reasoning_in_input():
     assert reasoning_items[0]["encrypted_content"] == "enc_blob"
     # Native Codex still asks for encrypted_content back.
     assert "reasoning.encrypted_content" in kwargs.get("include", [])
+
+
+# ---------------------------------------------------------------------------
+# Fix D: entitlement 403 must NOT trigger credential-pool refresh loop
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "message",
+    [
+        # The exact wire text RaidenTyler and Don Piedro captured.
+        "You have either run out of available resources or do not have an "
+        "active Grok subscription. Manage at https://grok.com",
+        # Permission-style variant from the same 403 body.
+        "The caller does not have permission to execute the specified "
+        "operation for grok-4.3",
+    ],
+)
+def test_is_entitlement_failure_matches_real_xai_bodies(message):
+    from run_agent import AIAgent
+
+    assert AIAgent._is_entitlement_failure(
+        {"message": message, "reason": "permission_denied"},
+        403,
+    )
+
+
+def test_is_entitlement_failure_false_for_status_other_than_401_403():
+    """200/429/500 must never be classified as entitlement, even if body matches."""
+    from run_agent import AIAgent
+
+    body = {
+        "message": "do not have an active Grok subscription",
+    }
+    assert not AIAgent._is_entitlement_failure(body, 500)
+    assert not AIAgent._is_entitlement_failure(body, 429)
+    assert not AIAgent._is_entitlement_failure(body, 200)
+
+
+def test_is_entitlement_failure_false_for_unrelated_auth_errors():
+    """A real auth failure (expired token, wrong key) must keep refreshing."""
+    from run_agent import AIAgent
+
+    # Generic Anthropic-style auth failure
+    assert not AIAgent._is_entitlement_failure(
+        {"message": "Invalid API key", "reason": "authentication_error"},
+        401,
+    )
+    # OAuth token expired
+    assert not AIAgent._is_entitlement_failure(
+        {"message": "Token has expired", "reason": "unauthorized"},
+        401,
+    )
+    # Empty context
+    assert not AIAgent._is_entitlement_failure({}, 401)
+    assert not AIAgent._is_entitlement_failure(None, 401)
+
+
+def test_recover_with_credential_pool_skips_refresh_on_entitlement_403():
+    """The recovery path must NOT call pool.try_refresh_current() on entitlement 403.
+
+    Before the fix, an unsubscribed xAI OAuth account would burn the agent
+    loop indefinitely: refresh → 403 → refresh → 403, infinitely.  With
+    the entitlement guard, recovery returns False so the error surfaces
+    normally with the friendly hint from _summarize_api_error.
+    """
+    from run_agent import AIAgent
+    from agent.error_classifier import FailoverReason
+
+    agent = _make_codex_agent()
+
+    # Wire a fake credential pool that records refresh attempts.
+    refresh_calls = {"n": 0}
+
+    class _FakePool:
+        def try_refresh_current(self):
+            refresh_calls["n"] += 1
+            return MagicMock(id="should_not_be_called")
+
+        def mark_exhausted_and_rotate(self, **_kwargs):
+            return None
+
+        def has_available(self):
+            return False
+
+    agent._credential_pool = _FakePool()
+
+    error_context = {
+        "reason": "The caller does not have permission to execute the specified operation",
+        "message": "You have either run out of available resources or do not have an "
+                   "active Grok subscription. Manage at https://grok.com",
+    }
+
+    recovered, _retried_429 = agent._recover_with_credential_pool(
+        status_code=403,
+        has_retried_429=False,
+        classified_reason=FailoverReason.auth,
+        error_context=error_context,
+    )
+
+    assert recovered is False, "Entitlement 403 must surface, not silently recover"
+    assert refresh_calls["n"] == 0, "try_refresh_current must NOT be called on entitlement 403"
+
+
+def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure():
+    """Regression guard: legitimate auth errors must still trigger refresh."""
+    from run_agent import AIAgent
+    from agent.error_classifier import FailoverReason
+
+    agent = _make_codex_agent()
+
+    refresh_calls = {"n": 0}
+
+    class _FakePool:
+        def try_refresh_current(self):
+            refresh_calls["n"] += 1
+            # Return a fake refreshed entry — semantically "refresh worked"
+            entry = MagicMock()
+            entry.id = "entry_refreshed"
+            return entry
+
+        def mark_exhausted_and_rotate(self, **_kwargs):
+            return None
+
+        def has_available(self):
+            return False
+
+    agent._credential_pool = _FakePool()
+    # _swap_credential is called by the recovery path — stub it out
+    agent._swap_credential = MagicMock()
+
+    error_context = {
+        "reason": "authentication_error",
+        "message": "Invalid API key",
+    }
+
+    recovered, _retried_429 = agent._recover_with_credential_pool(
+        status_code=401,
+        has_retried_429=False,
+        classified_reason=FailoverReason.auth,
+        error_context=error_context,
+    )
+
+    assert recovered is True, "Genuine auth failure must still recover via refresh"
+    assert refresh_calls["n"] == 1
+
+
+# ---------------------------------------------------------------------------
+# Fix E: grok-4.3 context length must be 1M, not 256K
+# ---------------------------------------------------------------------------
+
+
+def test_grok_4_3_context_length_is_1m():
+    """grok-4.3 ships with 1M context per docs.x.ai/developers/models/grok-4.3.
+
+    Hermes' substring-match fallback used to return 256k (from the
+    "grok-4" catch-all) which under-reported the model's real capacity.
+    """
+    from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+
+    # The entry exists with the expected value.
+    assert DEFAULT_CONTEXT_LENGTHS["grok-4.3"] == 1_000_000
+
+    # And longest-first substring matching resolves grok-4.3 and
+    # grok-4.3-latest to the new value, NOT the grok-4 catch-all.
+    for slug in ("grok-4.3", "grok-4.3-latest"):
+        matched_key = max(
+            (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
+            key=len,
+        )
+        assert matched_key == "grok-4.3", (
+            f"Expected longest-first match to land on grok-4.3 for {slug}, "
+            f"got {matched_key}"
+        )
+        assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 1_000_000
+
+
+def test_grok_4_still_resolves_to_256k():
+    """Regression guard: grok-4 (non-.3) must still resolve to 256k."""
+    from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+
+    for slug in ("grok-4", "grok-4-0709"):
+        matched_key = max(
+            (k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
+            key=len,
+        )
+        # grok-4-0709 contains "grok-4" but not "grok-4.3"; matched key
+        # must be "grok-4" (or a more specific variant family if one is
+        # ever added).  The 256k contract must hold.
+        assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000

From 9818b9a1acb915971d835d1faa85949e9f7a87a5 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 17:15:22 -0700
Subject: [PATCH 094/218] fix(xai-oauth): rewrite entitlement-403 hint to not
 accuse subscribers (#26666)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #26644 confidently told users "xAI OAuth account lacks SuperGrok /
X Premium entitlement" on any 403 from xAI's permission-denied surface.
But that body is returned for at least four distinct causes that
Hermes cannot distinguish from the wire:

  * Account has no Grok subscription at all
  * Account has SuperGrok but the tier doesn't include the requested
    model (e.g. grok-4.3 needs SuperGrok Heavy)
  * Monthly quota for the subscribed tier is exhausted
  * SuperGrok is active but the API access add-on isn't enabled

Don Piedro pushed back that he IS subscribed yet still hit this.
Picking the worst-case interpretation ("you're not subscribed")
reads as wrong and insulting to subscribers, and points them at a
fix they already did.

New wording lists all 4 possibilities and points at
https://grok.com/?_s=usage where the user can check which applies.

The detection logic and credential-pool short-circuit (PR #26664)
are unchanged — only the user-facing wording is rephrased.
---
 run_agent.py                                  | 42 ++++++++++++------
 .../test_codex_xai_oauth_recovery.py          | 44 +++++++++++++++++--
 2 files changed, 69 insertions(+), 17 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index da47ca84e34..da05e7e8239 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5006,23 +5006,35 @@ class AIAgent:
 
     @staticmethod
     def _decorate_xai_entitlement_error(detail: str) -> str:
-        """Append a friendly hint when xAI's OAuth surface returns an
-        entitlement-shaped error.
+        """Append a neutral hint when xAI's OAuth surface returns the
+        permission-denied 403.
 
-        xAI's ``/v1/responses`` endpoint replies to OAuth tokens that lack a
-        SuperGrok / X Premium subscription with HTTP 403 carrying a body like::
+        xAI's ``/v1/responses`` endpoint replies to several distinct failure
+        modes with the SAME body::
 
             {"code": "The caller does not have permission to execute the
              specified operation", "error": "You have either run out of
              available resources or do not have an active Grok subscription.
-             Manage subscriptions at https://grok.com/..."}
+             Manage subscriptions at https://grok.com/?_s=usage or subscribe
+             at https://grok.com/supergrok"}
 
-        The raw text is useful but the action the user needs to take (subscribe
-        on grok.com, or switch providers with ``/model``) isn't obvious from
-        the wire format.  Detect the entitlement shape and append a hint.
+        That body covers at least four real causes we cannot distinguish
+        without more info from xAI:
 
-        Matched once per detail string — won't double-decorate if the upstream
-        already concatenated the same text.
+          * Account has no Grok subscription at all
+          * Account has SuperGrok but the tier doesn't include the requested
+            model (e.g. grok-4.3 needs SuperGrok Heavy)
+          * Monthly quota for the subscribed tier is exhausted (the
+            ``?_s=usage`` URL hints at this)
+          * SuperGrok is active but the API access add-on isn't enabled
+
+        Picking one ("you're not subscribed") is wrong for the other three
+        and reads as insulting to subscribers.  Surface the raw xAI text
+        verbatim and point at https://grok.com/?_s=usage where the user
+        can see WHICH of those four it is.
+
+        Matched once per detail string — won't double-decorate if the
+        upstream already concatenated the same text.
         """
         if not detail:
             return detail
@@ -5035,11 +5047,15 @@ class AIAgent:
         if not is_entitlement:
             return detail
         hint = (
-            " — xAI OAuth account lacks SuperGrok / X Premium entitlement for "
-            "this model. Subscribe at https://grok.com or run `/model` to "
+            " — xAI rejected the request on this OAuth account. Could be a "
+            "missing subscription, a tier that doesn't include this model, an "
+            "exhausted quota, or API access not enabled. Check "
+            "https://grok.com/?_s=usage to see which, or run `/model` to "
             "switch providers."
         )
-        if hint.strip() in detail:
+        # Idempotency: detect prior decoration by a substring unique to the
+        # hint (not present in xAI's own body text).
+        if "Could be a missing subscription" in detail:
             return detail
         return f"{detail}{hint}"
 
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
index 7c675f22225..c64f46eea09 100644
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -163,7 +163,12 @@ def test_codex_stream_postlude_error_still_falls_back():
 
 
 def test_summarize_api_error_decorates_xai_entitlement_403():
-    """xAI's OAuth 403 must end with the subscribe-or-switch hint."""
+    """xAI's OAuth 403 must end with the neutral 4-cause hint.
+
+    Wording is deliberately ambiguous because xAI returns the SAME body for:
+    no subscription, wrong tier, exhausted quota, or API access not enabled.
+    Picking one (e.g. "you're not subscribed") would insult subscribers.
+    """
     from run_agent import AIAgent
 
     error = RuntimeError(
@@ -173,10 +178,39 @@ def test_summarize_api_error_decorates_xai_entitlement_403():
         "subscriptions at https://grok.com'}"
     )
     summary = AIAgent._summarize_api_error(error)
+    # The original xAI text must survive — it's still useful diagnostic info.
     assert "do not have an active Grok subscription" in summary
-    assert "SuperGrok" in summary
+    # The hint must NOT confidently assert "lacks subscription"; it must
+    # acknowledge the 4 possible causes.
+    assert "Could be a missing subscription" in summary
+    assert "tier that doesn't include this model" in summary
+    assert "exhausted quota" in summary
+    assert "API access not enabled" in summary
+    # The hint must point at the usage page where the user can verify which.
+    assert "https://grok.com/?_s=usage" in summary
+    # Switching providers is still a valid escape hatch.
     assert "/model" in summary
-    assert "https://grok.com" in summary
+
+
+def test_summarize_api_error_does_not_accuse_subscribers():
+    """Hint must not confidently say the user has no subscription.
+
+    Don Piedro reported his subscription is active. The hint must not
+    contradict him — it must list all 4 possible causes and let him
+    check which one applies.
+    """
+    from run_agent import AIAgent
+
+    error = RuntimeError(
+        "HTTP 403: do not have an active Grok subscription"
+    )
+    summary = AIAgent._summarize_api_error(error)
+    # MUST NOT contain language that assumes the user is unsubscribed.
+    assert "lacks SuperGrok" not in summary
+    assert "lacks subscription" not in summary
+    assert "your account doesn't have" not in summary.lower()
+    # MUST contain the neutral framing.
+    assert "Could be" in summary or "could be" in summary
 
 
 def test_summarize_api_error_decorates_xai_body_message():
@@ -197,7 +231,7 @@ def test_summarize_api_error_decorates_xai_body_message():
 
     summary = AIAgent._summarize_api_error(_XaiErr("403"))
     assert "HTTP 403" in summary
-    assert "SuperGrok / X Premium" in summary
+    assert "Could be a missing subscription" in summary
 
 
 def test_summarize_api_error_idempotent_for_entitlement_hint():
@@ -208,6 +242,8 @@ def test_summarize_api_error_idempotent_for_entitlement_hint():
     once = AIAgent._decorate_xai_entitlement_error(raw)
     twice = AIAgent._decorate_xai_entitlement_error(once)
     assert once == twice
+    # Sanity: the hint did fire on the first pass.
+    assert "Could be a missing subscription" in once
 
 
 def test_summarize_api_error_passes_through_unrelated_errors():

From 6784c80794bfd3cc40aae7f7d9f1a59876de7799 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 17:23:33 -0700
Subject: [PATCH 095/218] fix(xai-oauth): lead entitlement-403 hint with X
 Premium+ gotcha (#26672)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The #1 confusing cause of the xAI 403 (per Teknium): X Premium+
subscribers see Grok inside the X app and assume API access is
included.  It is NOT — only standalone SuperGrok subscribers can use
xai-oauth with Hermes today.  Without calling this out, every Premium+
user hits the 403 with no idea why.

PR #26666's neutral 4-cause list was correct but buried the most
common cause.  Lead with the Premium+ gotcha, then list the other
possibilities (no subscription, wrong tier, exhausted quota) as
fallbacks.  Same neutral framing — does not accuse anyone of being
unsubscribed.
---
 run_agent.py                                  | 38 ++++++++--------
 .../test_codex_xai_oauth_recovery.py          | 44 ++++++++++---------
 2 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index da05e7e8239..85c1128d68e 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5018,20 +5018,21 @@ class AIAgent:
              Manage subscriptions at https://grok.com/?_s=usage or subscribe
              at https://grok.com/supergrok"}
 
-        That body covers at least four real causes we cannot distinguish
-        without more info from xAI:
+        That body covers several real causes we cannot distinguish without
+        more info from xAI.  The most common (and least obvious) one is
+        that **X Premium+ does NOT include API access** — only standalone
+        SuperGrok subscribers can use Hermes against xai-oauth.  Lots of
+        users see Grok in their X app, assume it works here too, and hit
+        this 403 with no idea why.  Lead the hint with that.
 
-          * Account has no Grok subscription at all
-          * Account has SuperGrok but the tier doesn't include the requested
-            model (e.g. grok-4.3 needs SuperGrok Heavy)
-          * Monthly quota for the subscribed tier is exhausted (the
-            ``?_s=usage`` URL hints at this)
-          * SuperGrok is active but the API access add-on isn't enabled
+        Other possible causes:
+          * No Grok subscription at all
+          * SuperGrok tier doesn't include the requested model (e.g.
+            grok-4.3 may need a higher tier)
+          * Monthly quota exhausted (the ``?_s=usage`` URL hints at this)
 
-        Picking one ("you're not subscribed") is wrong for the other three
-        and reads as insulting to subscribers.  Surface the raw xAI text
-        verbatim and point at https://grok.com/?_s=usage where the user
-        can see WHICH of those four it is.
+        Surface the raw xAI text verbatim and point at
+        https://grok.com/?_s=usage where the user can see WHICH applies.
 
         Matched once per detail string — won't double-decorate if the
         upstream already concatenated the same text.
@@ -5047,15 +5048,16 @@ class AIAgent:
         if not is_entitlement:
             return detail
         hint = (
-            " — xAI rejected the request on this OAuth account. Could be a "
-            "missing subscription, a tier that doesn't include this model, an "
-            "exhausted quota, or API access not enabled. Check "
-            "https://grok.com/?_s=usage to see which, or run `/model` to "
-            "switch providers."
+            " — xAI rejected this OAuth account. NOTE: X Premium+ does NOT "
+            "include xAI API access — only standalone SuperGrok subscribers "
+            "can use this provider. Other possible causes: no Grok "
+            "subscription, your tier doesn't include this model, or your "
+            "quota is exhausted. Check https://grok.com/?_s=usage to see "
+            "which, or run `/model` to switch providers."
         )
         # Idempotency: detect prior decoration by a substring unique to the
         # hint (not present in xAI's own body text).
-        if "Could be a missing subscription" in detail:
+        if "X Premium+ does NOT include" in detail:
             return detail
         return f"{detail}{hint}"
 
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
index c64f46eea09..9192d50695b 100644
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -163,11 +163,13 @@ def test_codex_stream_postlude_error_still_falls_back():
 
 
 def test_summarize_api_error_decorates_xai_entitlement_403():
-    """xAI's OAuth 403 must end with the neutral 4-cause hint.
+    """xAI's OAuth 403 must surface the X Premium+ gotcha + neutral causes.
 
-    Wording is deliberately ambiguous because xAI returns the SAME body for:
-    no subscription, wrong tier, exhausted quota, or API access not enabled.
-    Picking one (e.g. "you're not subscribed") would insult subscribers.
+    Wording deliberately leads with the X Premium+ gotcha because that's
+    the #1 confusing case: people see Grok in their X app, assume it
+    works here too, and hit this 403 with no idea API access is a
+    separate SKU.  Other causes (no subscription, wrong tier, exhausted
+    quota) follow.
     """
     from run_agent import AIAgent
 
@@ -180,13 +182,15 @@ def test_summarize_api_error_decorates_xai_entitlement_403():
     summary = AIAgent._summarize_api_error(error)
     # The original xAI text must survive — it's still useful diagnostic info.
     assert "do not have an active Grok subscription" in summary
-    # The hint must NOT confidently assert "lacks subscription"; it must
-    # acknowledge the 4 possible causes.
-    assert "Could be a missing subscription" in summary
-    assert "tier that doesn't include this model" in summary
-    assert "exhausted quota" in summary
-    assert "API access not enabled" in summary
-    # The hint must point at the usage page where the user can verify which.
+    # The hint MUST lead with the X Premium+ gotcha (most likely cause
+    # for users who think they're subscribed).
+    assert "X Premium+ does NOT include" in summary
+    assert "standalone SuperGrok subscribers" in summary
+    # Other causes still listed.
+    assert "no Grok subscription" in summary
+    assert "tier doesn't include this model" in summary
+    assert "quota is exhausted" in summary
+    # The hint must point at the usage page where the user can verify.
     assert "https://grok.com/?_s=usage" in summary
     # Switching providers is still a valid escape hatch.
     assert "/model" in summary
@@ -196,8 +200,9 @@ def test_summarize_api_error_does_not_accuse_subscribers():
     """Hint must not confidently say the user has no subscription.
 
     Don Piedro reported his subscription is active. The hint must not
-    contradict him — it must list all 4 possible causes and let him
-    check which one applies.
+    contradict him — leading with the X Premium+ gotcha gives subscribers
+    a plausible reason ("oh, I'm on Premium+ not pure SuperGrok") instead
+    of accusing them of lying about having a subscription.
     """
     from run_agent import AIAgent
 
@@ -205,12 +210,11 @@ def test_summarize_api_error_does_not_accuse_subscribers():
         "HTTP 403: do not have an active Grok subscription"
     )
     summary = AIAgent._summarize_api_error(error)
-    # MUST NOT contain language that assumes the user is unsubscribed.
+    # MUST NOT contain language that flatly assumes the user is unsubscribed.
     assert "lacks SuperGrok" not in summary
-    assert "lacks subscription" not in summary
-    assert "your account doesn't have" not in summary.lower()
-    # MUST contain the neutral framing.
-    assert "Could be" in summary or "could be" in summary
+    assert "you are not subscribed" not in summary.lower()
+    # MUST lead with the most-likely-but-non-accusatory cause.
+    assert "X Premium+ does NOT include" in summary
 
 
 def test_summarize_api_error_decorates_xai_body_message():
@@ -231,7 +235,7 @@ def test_summarize_api_error_decorates_xai_body_message():
 
     summary = AIAgent._summarize_api_error(_XaiErr("403"))
     assert "HTTP 403" in summary
-    assert "Could be a missing subscription" in summary
+    assert "X Premium+ does NOT include" in summary
 
 
 def test_summarize_api_error_idempotent_for_entitlement_hint():
@@ -243,7 +247,7 @@ def test_summarize_api_error_idempotent_for_entitlement_hint():
     twice = AIAgent._decorate_xai_entitlement_error(once)
     assert once == twice
     # Sanity: the hint did fire on the first pass.
-    assert "Could be a missing subscription" in once
+    assert "X Premium+ does NOT include" in once
 
 
 def test_summarize_api_error_passes_through_unrelated_errors():

From 566d8f0d75049e5e4e4e3e3fde7f8c766ae235d6 Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Fri, 15 May 2026 20:08:24 -0500
Subject: [PATCH 096/218] fix(tui): keep DECSTBM scroll region off bottom row
 (#26683)

Avoid shifting the terminal's last visible row in the alt-screen DECSTBM fast path, which can leave transient scroll bleed/discoloration artifacts around the status lane until a repaint. Add regression tests to preserve the fast path when safe and skip it when the hint touches the bottom row.
---
 .../hermes-ink/src/ink/log-update.test.ts     | 42 +++++++++++++++++++
 .../packages/hermes-ink/src/ink/log-update.ts |  5 ++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts
index 35c99f7e0a2..a11a028e771 100644
--- a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts
@@ -42,6 +42,8 @@ const stdoutOnly = (diff: ReturnType<LogUpdate['render']>) =>
     .map(p => (p as { type: 'stdout'; content: string }).content)
     .join('')
 
+const hasDecstbm = (text: string) => /\x1b\[\d+;\d+r/.test(text)
+
 describe('LogUpdate.render diff contract', () => {
   it('emits only changed cells when most rows match', () => {
     const w = 20
@@ -154,4 +156,44 @@ describe('LogUpdate.render diff contract', () => {
     expect(diff.some(p => p.type === 'clearTerminal')).toBe(true)
     expect(stdoutOnly(diff)).toContain('timer2s')
   })
+
+  it('keeps DECSTBM fast-path when scroll region stays above bottom row', () => {
+    const w = 12
+    const h = 6
+    const prev = mkScreen(w, h)
+    const next = mkScreen(w, h)
+
+    paint(prev, 1, 'row one')
+    paint(next, 1, 'row one')
+
+    const prevFrame = mkFrame(prev, w, h)
+    const nextFrame: Frame = {
+      ...mkFrame(next, w, h),
+      scrollHint: { top: 1, bottom: 4, delta: 1 }
+    }
+    const log = new LogUpdate({ isTTY: true, stylePool })
+    const diff = log.render(prevFrame, nextFrame, true, true)
+
+    expect(hasDecstbm(stdoutOnly(diff))).toBe(true)
+  })
+
+  it('skips DECSTBM when scroll region touches the bottom row', () => {
+    const w = 12
+    const h = 6
+    const prev = mkScreen(w, h)
+    const next = mkScreen(w, h)
+
+    paint(prev, 1, 'row one')
+    paint(next, 1, 'row one')
+
+    const prevFrame = mkFrame(prev, w, h)
+    const nextFrame: Frame = {
+      ...mkFrame(next, w, h),
+      scrollHint: { top: 1, bottom: 5, delta: 1 }
+    }
+    const log = new LogUpdate({ isTTY: true, stylePool })
+    const diff = log.render(prevFrame, nextFrame, true, true)
+
+    expect(hasDecstbm(stdoutOnly(diff))).toBe(false)
+  })
 })
diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.ts
index 9a377c2c6f6..0f36d4641e7 100644
--- a/ui-tui/packages/hermes-ink/src/ink/log-update.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/log-update.ts
@@ -175,7 +175,10 @@ export class LogUpdate {
     if (altScreen && next.scrollHint && decstbmSafe) {
       const { top, bottom, delta } = next.scrollHint
 
-      if (top >= 0 && bottom < prev.screen.height && bottom < next.screen.height) {
+      // Keep DECSTBM away from the terminal's last visible row. In alt-screen
+      // layouts we reserve that lane for status/cursor parking, and scrolling
+      // it can leave transient ghosting/bleed artifacts until a later repaint.
+      if (top >= 0 && bottom < prev.screen.height - 1 && bottom < next.screen.height - 1) {
         shiftRows(prev.screen, top, bottom, delta)
         scrollPatch = [
           {

From 006937f7d062f7f1dd830aa16476ce962bd30445 Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Fri, 15 May 2026 20:19:02 -0500
Subject: [PATCH 097/218] fix(tui): handle timeout/error subagent statuses in
 /agents (#26687)

Accept delegation timeout/error statuses in the TUI subagent model, normalize unknown status strings defensively, and harden /agents overlay rendering/sorting so unknown statuses cannot crash glyph/color lookup. Add regression tests for live event normalization and disk snapshot replay.
---
 .../createGatewayEventHandler.test.ts         | 55 +++++++++++++++++++
 .../src/__tests__/spawnHistoryStore.test.ts   | 46 ++++++++++++++++
 ui-tui/src/app/createGatewayEventHandler.ts   | 29 ++++++++--
 ui-tui/src/app/spawnHistoryStore.ts           | 24 +++++++-
 ui-tui/src/components/agentsOverlay.tsx       | 19 +++++--
 ui-tui/src/components/thinking.tsx            |  6 +-
 ui-tui/src/gatewayTypes.ts                    |  4 +-
 ui-tui/src/types.ts                           |  4 +-
 8 files changed, 173 insertions(+), 14 deletions(-)
 create mode 100644 ui-tui/src/__tests__/spawnHistoryStore.test.ts

diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index d74976d195e..cd278eecdf9 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -737,6 +737,61 @@ describe('createGatewayEventHandler', () => {
     expect(getTurnState().activity).toMatchObject([{ text: 'boom', tone: 'error' }])
   })
 
+  it('accepts timeout/error subagent terminal statuses and ignores stale live events', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    onEvent({
+      payload: { goal: 'timeout child', subagent_id: 'sa-timeout', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+    onEvent({
+      payload: { goal: 'timeout child', status: 'timeout', subagent_id: 'sa-timeout', task_index: 0 },
+      type: 'subagent.complete'
+    } as any)
+
+    expect(getTurnState().subagents.find(s => s.id === 'sa-timeout')?.status).toBe('timeout')
+
+    // Late start/spawn updates must not clobber terminal timeout/error states.
+    onEvent({
+      payload: { goal: 'timeout child', subagent_id: 'sa-timeout', task_index: 0 },
+      type: 'subagent.start'
+    } as any)
+    onEvent({
+      payload: { goal: 'timeout child', subagent_id: 'sa-timeout', task_index: 0 },
+      type: 'subagent.spawn_requested'
+    } as any)
+
+    expect(getTurnState().subagents.find(s => s.id === 'sa-timeout')?.status).toBe('timeout')
+
+    onEvent({
+      payload: { goal: 'error child', subagent_id: 'sa-error', task_index: 1 },
+      type: 'subagent.start'
+    } as any)
+    onEvent({
+      payload: { goal: 'error child', status: 'error', subagent_id: 'sa-error', task_index: 1 },
+      type: 'subagent.complete'
+    } as any)
+
+    expect(getTurnState().subagents.find(s => s.id === 'sa-error')?.status).toBe('error')
+  })
+
+  it('normalizes unknown subagent.complete statuses to completed', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    onEvent({
+      payload: { goal: 'weird child', subagent_id: 'sa-weird', task_index: 2 },
+      type: 'subagent.start'
+    } as any)
+    onEvent({
+      payload: { goal: 'weird child', status: 'mystery_status', subagent_id: 'sa-weird', task_index: 2 },
+      type: 'subagent.complete'
+    } as any)
+
+    expect(getTurnState().subagents.find(s => s.id === 'sa-weird')?.status).toBe('completed')
+  })
+
   it('drops stale reasoning/tool/todos events after ctrl-c until the next message starts', () => {
     // Repro for the discord report: ctrl-c interrupts, but late reasoning/tool
     // events from the still-winding-down agent loop kept populating the UI for
diff --git a/ui-tui/src/__tests__/spawnHistoryStore.test.ts b/ui-tui/src/__tests__/spawnHistoryStore.test.ts
new file mode 100644
index 00000000000..544280e5c42
--- /dev/null
+++ b/ui-tui/src/__tests__/spawnHistoryStore.test.ts
@@ -0,0 +1,46 @@
+import { beforeEach, describe, expect, it } from 'vitest'
+
+import { clearSpawnHistory, getSpawnHistory, pushDiskSnapshot } from '../app/spawnHistoryStore.js'
+
+describe('spawnHistoryStore status normalization', () => {
+  beforeEach(() => {
+    clearSpawnHistory()
+  })
+
+  it('keeps timeout/error statuses from disk snapshots', () => {
+    pushDiskSnapshot(
+      {
+        finished_at: 1_700_000_001,
+        label: 'status test',
+        session_id: 'sess-1',
+        started_at: 1_700_000_000,
+        subagents: [
+          { goal: 'timeout child', id: 'sa-timeout', index: 0, status: 'timeout' },
+          { goal: 'error child', id: 'sa-error', index: 1, status: 'error' }
+        ]
+      },
+      '/tmp/snap-timeout-error.json'
+    )
+
+    const statuses = getSpawnHistory()[0]?.subagents.map(s => s.status)
+
+    expect(statuses).toEqual(['timeout', 'error'])
+  })
+
+  it('falls back unknown disk statuses to completed', () => {
+    pushDiskSnapshot(
+      {
+        finished_at: 1_700_000_011,
+        label: 'unknown status test',
+        session_id: 'sess-2',
+        started_at: 1_700_000_010,
+        subagents: [{ goal: 'mystery child', id: 'sa-unknown', index: 0, status: 'mystery_status' }]
+      },
+      '/tmp/snap-unknown.json'
+    )
+
+    const status = getSpawnHistory()[0]?.subagents[0]?.status
+
+    expect(status).toBe('completed')
+  })
+})
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 555a35e8afe..ca269a131b4 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -13,7 +13,7 @@ import { rpcErrorMessage } from '../lib/rpc.js'
 import { topLevelSubagents } from '../lib/subagentTree.js'
 import { formatToolCall, stripAnsi } from '../lib/text.js'
 import { fromSkin } from '../theme.js'
-import type { Msg, SubagentProgress } from '../types.js'
+import type { Msg, SubagentProgress, SubagentStatus } from '../types.js'
 
 import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
 import type { GatewayEventHandlerContext } from './interfaces.js'
@@ -54,6 +54,26 @@ const pushThinking = pushUnique(6)
 const pushNote = pushUnique(6)
 const pushTool = pushUnique(8)
 
+const KNOWN_SUBAGENT_STATUSES = new Set<SubagentStatus>([
+  'completed',
+  'error',
+  'failed',
+  'interrupted',
+  'queued',
+  'running',
+  'timeout'
+])
+
+const normalizeSubagentStatus = (status: unknown, fallback: SubagentStatus): SubagentStatus => {
+  if (typeof status !== 'string') {
+    return fallback
+  }
+
+  const normalized = status.toLowerCase() as SubagentStatus
+
+  return KNOWN_SUBAGENT_STATUSES.has(normalized) ? normalized : fallback
+}
+
 export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: GatewayEvent) => void {
   const { rpc } = ctx.gateway
   const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
@@ -180,8 +200,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
 
   // Terminal statuses are never overwritten by late-arriving live events —
   // otherwise a stale `subagent.start` / `spawn_requested` can clobber a
-  // `failed` or `interrupted` terminal state (Copilot review #14045).
-  const isTerminalStatus = (s: SubagentProgress['status']) => s === 'completed' || s === 'failed' || s === 'interrupted'
+  // terminal state from complete (failed/interrupted/timeout/error).
+  const isTerminalStatus = (s: SubagentProgress['status']) =>
+    s === 'completed' || s === 'error' || s === 'failed' || s === 'interrupted' || s === 'timeout'
 
   const keepTerminalElseRunning = (s: SubagentProgress['status']) => (isTerminalStatus(s) ? s : 'running')
 
@@ -648,7 +669,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           ev.payload,
           c => ({
             durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
-            status: ev.payload.status ?? 'completed',
+            status: normalizeSubagentStatus(ev.payload.status, 'completed'),
             summary: ev.payload.summary || ev.payload.text || c.summary
           }),
           { createIfMissing: false }
diff --git a/ui-tui/src/app/spawnHistoryStore.ts b/ui-tui/src/app/spawnHistoryStore.ts
index 9adb2b59cd0..ec36148403d 100644
--- a/ui-tui/src/app/spawnHistoryStore.ts
+++ b/ui-tui/src/app/spawnHistoryStore.ts
@@ -1,7 +1,7 @@
 import { atom } from 'nanostores'
 
 import type { SpawnTreeLoadResponse } from '../gatewayTypes.js'
-import type { SubagentProgress } from '../types.js'
+import type { SubagentProgress, SubagentStatus } from '../types.js'
 
 export interface SpawnSnapshot {
   finishedAt: number
@@ -21,6 +21,26 @@ export interface SpawnDiffPair {
 
 const HISTORY_LIMIT = 10
 
+const KNOWN_SUBAGENT_STATUSES = new Set<SubagentStatus>([
+  'completed',
+  'error',
+  'failed',
+  'interrupted',
+  'queued',
+  'running',
+  'timeout'
+])
+
+const normalizeSubagentStatus = (status: unknown, fallback: SubagentStatus): SubagentStatus => {
+  if (typeof status !== 'string') {
+    return fallback
+  }
+
+  const normalized = status.toLowerCase() as SubagentStatus
+
+  return KNOWN_SUBAGENT_STATUSES.has(normalized) ? normalized : fallback
+}
+
 export const $spawnHistory = atom<SpawnSnapshot[]>([])
 export const $spawnDiff = atom<null | SpawnDiffPair>(null)
 
@@ -128,7 +148,7 @@ function normaliseSubagent(raw: unknown): SubagentProgress {
     parentId: s(o.parentId) ?? null,
     reasoningTokens: n(o.reasoningTokens),
     startedAt: n(o.startedAt),
-    status: (s(o.status) as SubagentProgress['status']) ?? 'completed',
+    status: normalizeSubagentStatus(o.status, 'completed'),
     summary: s(o.summary),
     taskCount: typeof o.taskCount === 'number' ? o.taskCount : 1,
     thinking: (arr<string>(o.thinking) ?? []).filter(x => typeof x === 'string'),
diff --git a/ui-tui/src/components/agentsOverlay.tsx b/ui-tui/src/components/agentsOverlay.tsx
index a1b349827cc..497230c3934 100644
--- a/ui-tui/src/components/agentsOverlay.tsx
+++ b/ui-tui/src/components/agentsOverlay.tsx
@@ -57,25 +57,33 @@ const FILTER_LABEL: Record<FilterMode, string> = {
 }
 
 const STATUS_RANK: Record<Status, number> = {
+  error: 0,
   failed: 0,
   interrupted: 1,
+  timeout: 1,
   running: 2,
   queued: 3,
   completed: 4
 }
 
+const statusRank = (status: string): number => STATUS_RANK[status as Status] ?? STATUS_RANK.error
+
 const SORT_COMPARATORS: Record<SortMode, (a: SubagentNode, b: SubagentNode) => number> = {
   'depth-first': (a, b) => a.item.depth - b.item.depth || a.item.index - b.item.index,
   'tools-desc': (a, b) => b.aggregate.totalTools - a.aggregate.totalTools,
   'duration-desc': (a, b) => b.aggregate.totalDuration - a.aggregate.totalDuration,
-  status: (a, b) => STATUS_RANK[a.item.status] - STATUS_RANK[b.item.status]
+  status: (a, b) => statusRank(a.item.status) - statusRank(b.item.status)
 }
 
 const FILTER_PREDICATES: Record<FilterMode, (n: SubagentNode) => boolean> = {
   all: () => true,
   leaf: n => n.children.length === 0,
   running: n => n.item.status === 'running' || n.item.status === 'queued',
-  failed: n => n.item.status === 'failed' || n.item.status === 'interrupted'
+  failed: n =>
+    n.item.status === 'error' ||
+    n.item.status === 'failed' ||
+    n.item.status === 'interrupted' ||
+    n.item.status === 'timeout'
 }
 
 const STATUS_GLYPH: Record<Status, { color: (t: Theme) => string; glyph: string }> = {
@@ -83,7 +91,9 @@ const STATUS_GLYPH: Record<Status, { color: (t: Theme) => string; glyph: string
   queued: { color: t => t.color.muted, glyph: '○' },
   completed: { color: t => t.color.statusGood, glyph: '✓' },
   interrupted: { color: t => t.color.warn, glyph: '■' },
-  failed: { color: t => t.color.error, glyph: '✗' }
+  failed: { color: t => t.color.error, glyph: '✗' },
+  timeout: { color: t => t.color.warn, glyph: '⌛' },
+  error: { color: t => t.color.error, glyph: '⚠' }
 }
 
 // Heatmap palette — cold → hot, resolved against the active theme.
@@ -111,7 +121,8 @@ const formatRowId = (n: number): string => String(n + 1).padStart(2, ' ')
 const cycle = <T,>(order: readonly T[], current: T): T => order[(order.indexOf(current) + 1) % order.length]!
 
 const statusGlyph = (item: SubagentProgress, t: Theme) => {
-  const g = STATUS_GLYPH[item.status]
+  // Defensive fallback for cross-version snapshots with unknown statuses.
+  const g = STATUS_GLYPH[item.status] ?? STATUS_GLYPH.error
 
   return { color: g.color(t), glyph: g.glyph }
 }
diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx
index 4204ff56a0f..6908795f621 100644
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -327,7 +327,11 @@ function SubagentAccordion({
   const aggregate = node.aggregate
 
   const statusTone: 'dim' | 'error' | 'warn' =
-    item.status === 'failed' ? 'error' : item.status === 'interrupted' ? 'warn' : 'dim'
+    item.status === 'error' || item.status === 'failed'
+      ? 'error'
+      : item.status === 'interrupted' || item.status === 'timeout'
+        ? 'warn'
+        : 'dim'
 
   const prefix = item.taskCount > 1 ? `[${item.index + 1}/${item.taskCount}] ` : ''
   const goalLabel = item.goal || `Subagent ${item.index + 1}`
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 8c5cb18b23d..ab85c39fbdd 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -1,4 +1,4 @@
-import type { SessionInfo, SlashCategory, Usage } from './types.js'
+import type { SessionInfo, SlashCategory, SubagentStatus, Usage } from './types.js'
 
 export interface GatewaySkin {
   banner_hero?: string
@@ -394,7 +394,7 @@ export interface SubagentEventPayload {
   output_tokens?: number
   parent_id?: null | string
   reasoning_tokens?: number
-  status?: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
+  status?: SubagentStatus
   subagent_id?: string
   summary?: string
   task_count?: number
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index 658b9cc13d2..62f580090d2 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -17,6 +17,8 @@ export interface ActivityItem {
   tone: 'error' | 'info' | 'warn'
 }
 
+export type SubagentStatus = 'completed' | 'error' | 'failed' | 'interrupted' | 'queued' | 'running' | 'timeout'
+
 export interface SubagentProgress {
   apiCalls?: number
   costUsd?: number
@@ -36,7 +38,7 @@ export interface SubagentProgress {
   parentId: null | string
   reasoningTokens?: number
   startedAt?: number
-  status: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
+  status: SubagentStatus
   summary?: string
   taskCount: number
   thinking: string[]

From 55c9f32060bbe7eb48bee2b702c157408b468eb2 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Sat, 16 May 2026 06:55:56 +0530
Subject: [PATCH 098/218] fix(tui): width-aware markdown table rendering with
 vertical fallback (#26195)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor(tui): thread cols through Md/StreamingMd/renderTable, update cache key

* feat(tui): three-tier width calc + full-line string rendering in renderTable

Replaces the old renderTable (L203-244) with:
- Empty table guard
- Ragged row normalization
- Three-tier column width calculation (ideal → proportional shrink → hard scale)
- Rounding remainder distribution
- Full-line string rendering (one <Text> per row, not per cell)
- wrap=truncate-end on all table lines
- All cells rendered as plain text via stripInlineMarkup

No wrapping or vertical fallback yet — those come in Phase 3 and 4.

* feat(tui): wrapCell with grapheme-safe hard-break + multi-line row rendering

Adds:
- Intl.Segmenter-based grapheme splitting (fallback to [...word])
- wrapCell() for width-correct word wrapping on stripped text
- Multi-line row rendering with LineEntry metadata (header/separator/body)
- Post-render safety condition (maxLineWidth computed, vertical fallback in Task 4)
- Non-wrapping path preserved for tables that fit at ideal widths

* feat(tui): vertical key-value fallback with scaled threshold + safety check

Wires:
- Scaled row-height threshold (numCols<=3: 8, <=6: 5, else: 4)
- Post-render safety check (maxLineWidth > available space)
- Header-only edge case
- Vertical format: bold headers, stripped cell text, clamped separator width
- Iterates headers (not rows) for consistent key-value fields on ragged rows

* test(tui): pass cols to Md in test helpers, add width-overflow assertions

- renderAtWidth now passes cols={columns} to <Md> so width-aware code paths
  are exercised in tests
- tableFuzz: every rendered line must fit within allocated width (stringWidth)
- tableRepro: separator regex updated to match truncation ellipsis
- stringWidth imported from @hermes/ink for CJK-correct assertions

* fix(tui): address adversarial review — comment tier 3 budget overshoot, eliminate redundant wrapCell

- Add comment on Tier 3 MIN_COL_WIDTH clamp exceeding budget (self-heals via safetyOverflow)
- Track tallestBodyRow during allEntries build pass instead of re-wrapping every cell
  in a second traversal (eliminates O(cells) of redundant stripInlineMarkup+stringWidth)

* fix(tui): pass cols to recursive fenced-markdown Md, fix test frame extraction

- Thread cols into <Md> for fenced markdown blocks (L734) so nested
  tables use the width-aware renderer instead of max-content path
- Fix renderAtWidth helpers to extract final Ink repaint frame instead
  of concatenating all intermediate frames (REPAINT_RE split)
- Add fenced-markdown-table fixture to tableFuzz (exercises the nested path)

* chore: remove repro test suites and tmux driver script

These were scaffolding for development/reproduction — not needed in the PR.
---
 ui-tui/src/components/markdown.tsx          | 325 +++++++++++++++++---
 ui-tui/src/components/messageLine.tsx       |   6 +-
 ui-tui/src/components/streamingMarkdown.tsx |  11 +-
 3 files changed, 295 insertions(+), 47 deletions(-)

diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index ae234eb9ec7..c215cd811bf 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -200,44 +200,288 @@ export const stripInlineMarkup = (v: string) =>
     .replace(/(?<!\$)\$([^\s$](?:[^$\n]*?[^\s$])?)\$(?!\$)/g, '$1')
     .replace(/\\\(([^\n]+?)\\\)/g, '$1')
 
-const renderTable = (k: number, rows: string[][], t: Theme) => {
-  // Column widths in *display cells*, not UTF-16 code units.  CJK
-  // glyphs and most emoji render as two cells but `String#length`
-  // counts them as one, which collapses Chinese / Japanese / Korean
-  // tables into drift across rows.  `stringWidth` (Bun.stringWidth
-  // fast path + an East-Asian-width-aware fallback, memoised in
-  // @hermes/ink) returns the actual cell count.
-  const cellWidth = (raw: string) => stringWidth(stripInlineMarkup(raw))
+const SAFETY_MARGIN = 4
+const MIN_COL_WIDTH = 3
+const COL_GAP = 2 // the '  ' between columns
+const TABLE_PADDING_LEFT = 2 // paddingLeft={2} on the outer <Box>
 
-  const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => cellWidth(r[ci] ?? ''))))
+const renderTable = (k: number, rows: string[][], t: Theme, cols?: number) => {
+  // Guard: empty table
+  if (rows.length === 0 || rows[0]!.length === 0) return null
 
-  // Thin divider under the header.  Without it tables look like prose
-  // with extra spacing because the header is just accent-coloured text
-  // (#15534).  We avoid full borders on purpose — column widths come
-  // from `stringWidth(...)`, so the dividers and the row content stay
-  // in sync on CJK / emoji tables; tab-style column gaps still read
-  // cleanly without the boxed look.
-  const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join('  ')
+  const cellDisplayWidth = (raw: string) => stringWidth(stripInlineMarkup(raw))
 
-  return (
-    <Box flexDirection="column" key={k} paddingLeft={2}>
-      {rows.map((row, ri) => (
-        <Fragment key={ri}>
-          <Box>
-            {widths.map((w, ci) => (
-              <Text bold={ri === 0} color={ri === 0 ? t.color.accent : undefined} key={ci}>
-                <MdInline t={t} text={row[ci] ?? ''} />
-                {' '.repeat(Math.max(0, w - cellWidth(row[ci] ?? '')))}
-                {ci < widths.length - 1 ? '  ' : ''}
-              </Text>
-            ))}
-          </Box>
-          {ri === 0 && rows.length > 1 ? (
-            <Text color={t.color.muted} dimColor>
-              {sep}
+  // Minimum width: longest word in a cell (to avoid breaking words)
+  const minCellWidth = (raw: string) => {
+    const text = stripInlineMarkup(raw)
+    const words = text.split(/\s+/).filter(w => w.length > 0)
+    if (words.length === 0) return MIN_COL_WIDTH
+    return Math.max(...words.map(w => stringWidth(w)), MIN_COL_WIDTH)
+  }
+
+  const numCols = rows[0]!.length
+
+  // Normalize ragged rows: ensure every row has exactly numCols cells
+  const normalizedRows = rows.map(row => {
+    if (row.length >= numCols) return row.slice(0, numCols)
+    return [...row, ...Array<string>(numCols - row.length).fill('')]
+  })
+
+  // Ideal widths: max cell content per column
+  const idealWidths = normalizedRows[0]!.map((_, ci) =>
+    Math.max(...normalizedRows.map(r => cellDisplayWidth(r[ci] ?? '')), MIN_COL_WIDTH)
+  )
+
+  // Min widths: longest word per column
+  const minWidths = normalizedRows[0]!.map((_, ci) =>
+    Math.max(...normalizedRows.map(r => minCellWidth(r[ci] ?? '')), MIN_COL_WIDTH)
+  )
+
+  // Available width: cols minus table padding minus column gaps minus safety.
+  // transcriptBodyWidth (source of cols) subtracts message gutter + scrollbar,
+  // but NOT this table's paddingLeft — we subtract it here.
+  const gapOverhead = (numCols - 1) * COL_GAP
+  const availableWidth = cols
+    ? Math.max(cols - TABLE_PADDING_LEFT - gapOverhead - SAFETY_MARGIN, numCols * MIN_COL_WIDTH)
+    : Infinity
+
+  const totalIdeal = idealWidths.reduce((a, b) => a + b, 0)
+  const totalMin = minWidths.reduce((a, b) => a + b, 0)
+
+  let columnWidths: number[]
+  let needsWrap = false
+
+  if (totalIdeal <= availableWidth) {
+    // Tier 1: everything fits at ideal widths
+    columnWidths = idealWidths
+  } else if (totalMin <= availableWidth) {
+    // Tier 2: proportional shrink — distribute extra space beyond minimums
+    needsWrap = true
+    const extraSpace = availableWidth - totalMin
+    const overflows = idealWidths.map((ideal, i) => ideal - minWidths[i]!)
+    const totalOverflow = overflows.reduce((a, b) => a + b, 0)
+    if (totalOverflow === 0) {
+      columnWidths = [...minWidths]
+    } else {
+      const rawAlloc = minWidths.map((min, i) =>
+        min + (overflows[i]! / totalOverflow) * extraSpace
+      )
+      columnWidths = rawAlloc.map(v => Math.floor(v))
+      // Distribute rounding remainders to columns with largest fractional part
+      let remainder = availableWidth - columnWidths.reduce((a, b) => a + b, 0)
+      const fracs = rawAlloc.map((v, i) => ({ i, frac: v - Math.floor(v) }))
+        .sort((a, b) => b.frac - a.frac)
+      for (const { i } of fracs) {
+        if (remainder <= 0) break
+        columnWidths[i]!++
+        remainder--
+      }
+    }
+  } else {
+    // Tier 3: even min-widths don't fit — scale proportionally, allow hard breaks.
+    // NOTE: Math.max(..., MIN_COL_WIDTH) can push total above availableWidth when
+    // many columns are scaled below 3. This is caught by safetyOverflow → vertical fallback.
+    needsWrap = true
+    const scaleFactor = availableWidth / totalMin
+    const rawAlloc = minWidths.map(w => w * scaleFactor)
+    columnWidths = rawAlloc.map(v => Math.max(Math.floor(v), MIN_COL_WIDTH))
+    let remainder = availableWidth - columnWidths.reduce((a, b) => a + b, 0)
+    const fracs = rawAlloc.map((v, i) => ({ i, frac: v - Math.floor(v) }))
+      .sort((a, b) => b.frac - a.frac)
+    for (const { i } of fracs) {
+      if (remainder <= 0) break
+      columnWidths[i]!++
+      remainder--
+    }
+  }
+
+  // Grapheme-safe hard-break: prefer Intl.Segmenter, fall back to code-point split
+  const segmenter = typeof Intl !== 'undefined' && 'Segmenter' in Intl
+    ? new (Intl as any).Segmenter(undefined, { granularity: 'grapheme' })
+    : null
+
+  const graphemes = (s: string): string[] =>
+    segmenter
+      ? [...segmenter.segment(s)].map((seg: { segment: string }) => seg.segment)
+      : [...s]
+
+  // Word-wrap plain text to fit within `width` display columns.
+  // Operates on stripped text for correct width measurement.
+  const wrapCell = (raw: string, width: number, hard: boolean): string[] => {
+    const text = stripInlineMarkup(raw)
+    if (width <= 0) return [text]
+    if (stringWidth(text) <= width) return [text]
+
+    const words = text.split(/\s+/).filter(w => w.length > 0)
+    const lines: string[] = []
+    let current = ''
+    let currentWidth = 0
+
+    for (const word of words) {
+      const w = stringWidth(word)
+      if (currentWidth === 0) {
+        if (hard && w > width) {
+          for (const ch of graphemes(word)) {
+            const cw = stringWidth(ch)
+            if (currentWidth + cw > width && current) {
+              lines.push(current)
+              current = ''
+              currentWidth = 0
+            }
+            current += ch
+            currentWidth += cw
+          }
+        } else {
+          current = word
+          currentWidth = w
+        }
+      } else if (currentWidth + 1 + w <= width) {
+        current += ' ' + word
+        currentWidth += 1 + w
+      } else {
+        lines.push(current)
+        current = word
+        currentWidth = w
+      }
+    }
+    if (current) lines.push(current)
+    return lines.length > 0 ? lines : ['']
+  }
+
+  const isHard = totalMin > availableWidth // tier 3 needs hard word breaks
+  const sep = columnWidths.map(w => '─'.repeat(Math.max(1, w))).join('  ')
+
+  // When wrapping isn't needed, build single-line strings per row.
+  // All cells render as plain text via stripInlineMarkup.
+  // TODO: follow-up — format to ANSI then wrap with wrapAnsi for inline markdown preservation.
+  // See free-code/src/components/MarkdownTable.tsx L44-L62 for approach.
+  if (!needsWrap) {
+    const buildRowString = (row: string[]): string =>
+      row.map((cell, ci) => {
+        const text = stripInlineMarkup(cell)
+        const pad = ' '.repeat(Math.max(0, columnWidths[ci]! - stringWidth(text)))
+        const gap = ci < numCols - 1 ? '  ' : ''
+        return text + pad + gap
+      }).join('')
+
+    return (
+      <Box flexDirection="column" key={k} paddingLeft={TABLE_PADDING_LEFT}>
+        {normalizedRows.map((row, ri) => (
+          <Fragment key={ri}>
+            <Text
+              bold={ri === 0}
+              color={ri === 0 ? t.color.accent : undefined}
+              wrap="truncate-end"
+            >
+              {buildRowString(row)}
             </Text>
-          ) : null}
-        </Fragment>
+            {ri === 0 && normalizedRows.length > 1 ? (
+              <Text color={t.color.muted} dimColor wrap="truncate-end">{sep}</Text>
+            ) : null}
+          </Fragment>
+        ))}
+      </Box>
+    )
+  }
+
+  // Wrapping path: build multi-line rows as complete strings.
+  type LineEntry = { text: string; kind: 'header' | 'separator' | 'body' }
+
+  const buildRowLines = (row: string[]): string[] => {
+    const cellLines = row.map((cell, ci) =>
+      wrapCell(cell, columnWidths[ci]!, isHard)
+    )
+    const maxLines = Math.max(...cellLines.map(l => l.length), 1)
+
+    const result: string[] = []
+    for (let li = 0; li < maxLines; li++) {
+      let line = ''
+      for (let ci = 0; ci < numCols; ci++) {
+        const cl = cellLines[ci] ?? ['']
+        const cellText = li < cl.length ? cl[li]! : ''
+        const pad = ' '.repeat(Math.max(0, columnWidths[ci]! - stringWidth(cellText)))
+        line += cellText + pad
+        if (ci < numCols - 1) line += '  '
+      }
+      result.push(line)
+    }
+    return result
+  }
+
+  // Build all lines with metadata for styling, tracking tallest body row
+  const allEntries: LineEntry[] = []
+  let tallestBodyRow = 0
+  normalizedRows.forEach((row, ri) => {
+    const kind = ri === 0 ? 'header' as const : 'body' as const
+    const rowLines = buildRowLines(row)
+    rowLines.forEach(text => allEntries.push({ text, kind }))
+    if (ri > 0) tallestBodyRow = Math.max(tallestBodyRow, rowLines.length)
+    if (ri === 0 && normalizedRows.length > 1) {
+      allEntries.push({ text: sep, kind: 'separator' })
+    }
+  })
+
+  // Post-render safety condition: compute max line width.
+  const maxLineWidth = Math.max(...allEntries.map(e => stringWidth(e.text)))
+  const safetyOverflow = cols != null && maxLineWidth > cols - TABLE_PADDING_LEFT - SAFETY_MARGIN
+
+  // Scaled vertical threshold — 2-3 col tables stay tabular even with tall cells
+  const maxRowLinesThreshold = numCols <= 3 ? 8 : numCols <= 6 ? 5 : 4
+
+  const useVertical = tallestBodyRow > maxRowLinesThreshold || safetyOverflow
+
+  if (useVertical) {
+    // Edge case: header-only table
+    if (normalizedRows.length <= 1) {
+      return (
+        <Box flexDirection="column" key={k} paddingLeft={TABLE_PADDING_LEFT}>
+          <Text bold color={t.color.accent} wrap="wrap-trim">
+            {normalizedRows[0]!.map(h => stripInlineMarkup(h)).join(' · ')}
+          </Text>
+        </Box>
+      )
+    }
+
+    const headers = normalizedRows[0]!
+    const dataRows = normalizedRows.slice(1)
+    const sepWidth = Math.max(1, cols ? Math.min(cols - TABLE_PADDING_LEFT - 1, 40) : 40)
+
+    return (
+      <Box flexDirection="column" key={k} paddingLeft={TABLE_PADDING_LEFT}>
+        {dataRows.map((row, ri) => (
+          <Fragment key={ri}>
+            {ri > 0 ? (
+              <Text color={t.color.muted} dimColor>{'─'.repeat(sepWidth)}</Text>
+            ) : null}
+            {headers.map((header, ci) => {
+              const cell = row[ci] ?? ''
+              const label = stripInlineMarkup(header) || `Col ${ci + 1}`
+              return (
+                <Text key={ci} wrap="wrap-trim">
+                  <Text bold color={t.color.accent}>{label}:</Text>
+                  {' '}{stripInlineMarkup(cell)}
+                </Text>
+              )
+            })}
+          </Fragment>
+        ))}
+      </Box>
+    )
+  }
+
+  // Render wrapped horizontal rows — one <Text> per visual line.
+  return (
+    <Box flexDirection="column" key={k} paddingLeft={TABLE_PADDING_LEFT}>
+      {allEntries.map((entry, i) => (
+        <Text
+          bold={entry.kind === 'header'}
+          color={entry.kind === 'header' ? t.color.accent : entry.kind === 'separator' ? t.color.muted : undefined}
+          dimColor={entry.kind === 'separator'}
+          key={i}
+          wrap="truncate-end"
+        >
+          {entry.text}
+        </Text>
       ))}
     </Box>
   )
@@ -395,10 +639,10 @@ const cacheSet = (b: Map<string, ReactNode[]>, key: string, v: ReactNode[]) => {
   }
 }
 
-function MdImpl({ compact, t, text }: MdProps) {
+function MdImpl({ cols, compact, t, text }: MdProps) {
   const nodes = useMemo(() => {
     const bucket = cacheBucket(t)
-    const cacheKey = `${compact ? '1' : '0'}|${text}`
+    const cacheKey = `${compact ? '1' : '0'}|${cols ?? ''}|${text}`
     const cached = cacheGet(bucket, cacheKey)
 
     if (cached) {
@@ -490,7 +734,7 @@ function MdImpl({ compact, t, text }: MdProps) {
 
         if (['md', 'markdown'].includes(lang)) {
           start('paragraph')
-          nodes.push(<Md compact={compact} key={key} t={t} text={block.join('\n')} />)
+          nodes.push(<Md cols={cols} compact={compact} key={key} t={t} text={block.join('\n')} />)
 
           continue
         }
@@ -785,7 +1029,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           rows.push(splitRow(lines[i]!))
         }
 
-        nodes.push(renderTable(key, rows, t))
+        nodes.push(renderTable(key, rows, t, cols))
 
         continue
       }
@@ -838,7 +1082,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         }
 
         if (rows.length) {
-          nodes.push(renderTable(key, rows, t))
+          nodes.push(renderTable(key, rows, t, cols))
         }
 
         continue
@@ -852,7 +1096,7 @@ function MdImpl({ compact, t, text }: MdProps) {
     cacheSet(bucket, cacheKey, nodes)
 
     return nodes
-  }, [compact, t, text])
+  }, [cols, compact, t, text])
 
   return <Box flexDirection="column">{nodes}</Box>
 }
@@ -862,6 +1106,7 @@ export const Md = memo(MdImpl)
 type Kind = 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null
 
 interface MdProps {
+  cols?: number
   compact?: boolean
   t: Theme
   text: string
diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx
index 950b61b4d72..238b551ae97 100644
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -139,13 +139,15 @@ export const MessageLine = memo(function MessageLine({
     }
 
     if (msg.role === 'assistant') {
+      const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt)
+
       return isStreaming ? (
         // Incremental markdown: split at the last stable block boundary so
         // only the in-flight tail re-tokenizes per delta. See
         // streamingMarkdown.tsx for the cost model.
-        <StreamingMd compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
+        <StreamingMd cols={bodyWidth} compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
       ) : (
-        <Md compact={compact} t={t} text={limitHistoryRender ? boundedHistoryRenderText(msg.text) : msg.text} />
+        <Md cols={bodyWidth} compact={compact} t={t} text={limitHistoryRender ? boundedHistoryRenderText(msg.text) : msg.text} />
       )
     }
 
diff --git a/ui-tui/src/components/streamingMarkdown.tsx b/ui-tui/src/components/streamingMarkdown.tsx
index 1be70b283a8..786a3812461 100644
--- a/ui-tui/src/components/streamingMarkdown.tsx
+++ b/ui-tui/src/components/streamingMarkdown.tsx
@@ -128,7 +128,7 @@ export const findStableBoundary = (text: string) => {
   return -1
 }
 
-export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) {
+export const StreamingMd = memo(function StreamingMd({ cols, compact, t, text }: StreamingMdProps) {
   const stablePrefixRef = useRef('')
 
   // Reset if the text no longer starts with our recorded prefix (defensive;
@@ -151,22 +151,23 @@ export const StreamingMd = memo(function StreamingMd({ compact, t, text }: Strea
   const unstableSuffix = text.slice(stablePrefix.length)
 
   if (!stablePrefix) {
-    return <Md compact={compact} t={t} text={unstableSuffix} />
+    return <Md cols={cols} compact={compact} t={t} text={unstableSuffix} />
   }
 
   if (!unstableSuffix) {
-    return <Md compact={compact} t={t} text={stablePrefix} />
+    return <Md cols={cols} compact={compact} t={t} text={stablePrefix} />
   }
 
   return (
     <Box flexDirection="column">
-      <Md compact={compact} t={t} text={stablePrefix} />
-      <Md compact={compact} t={t} text={unstableSuffix} />
+      <Md cols={cols} compact={compact} t={t} text={stablePrefix} />
+      <Md cols={cols} compact={compact} t={t} text={unstableSuffix} />
     </Box>
   )
 })
 
 interface StreamingMdProps {
+  cols?: number
   compact?: boolean
   t: Theme
   text: string

From 86a368d8322b3977bf89b9043818eebc6adf470b Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Fri, 15 May 2026 22:14:41 -0400
Subject: [PATCH 099/218] remove pip installation method from docs

---
 website/docs/getting-started/installation.md | 25 --------------------
 1 file changed, 25 deletions(-)

diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index a88f4c8bd1c..10420d8df55 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -10,31 +10,6 @@ Get Hermes Agent up and running in under two minutes with the one-line installer
 
 ## Quick Install
 
-### pip (recommended for most users)
-
-```bash
-pip install hermes-agent
-```
-
-This gives you the full Hermes Agent — CLI, web dashboard, and TUI — with zero external dependencies for core usage. Node.js, browser engines, and other optional tools are bootstrapped lazily on first use (e.g. when you run `hermes --tui` or use browser tools).
-
-PyPI releases track **tagged versions** (major and minor releases), not every commit on `main`. If you want bleeding-edge changes as they land, use the git install below.
-
-After installing, run:
-
-```bash
-hermes setup   # interactive wizard — configures your LLM provider and API key
-hermes         # start chatting
-```
-
-:::tip Optional: install everything upfront
-`hermes postinstall` installs Node.js, browser engines, ripgrep, and ffmpeg in one shot — then runs the setup wizard. Use this if you want the full experience (TUI, browser tools, voice) without waiting for lazy installs on first use.
-:::
-
-:::tip
-If you have [uv](https://docs.astral.sh/uv/) installed, `uv pip install hermes-agent` is faster.
-:::
-
 ### One-Line Installer (Linux / macOS / WSL2)
 
 For a git-based install that tracks `main` and gives you the latest changes immediately:

From 63503ebb14069e8ba0bea91955e7ce4e01670a4e Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Fri, 15 May 2026 22:40:21 -0400
Subject: [PATCH 100/218] fix(dashboard): clarify Kanban Ready vs assignment

Ready column help and fallbacks now describe dependency-ready work; show a
badge on unassigned ready cards and fix the stale unassigned tooltip. Align
localized Ready help strings with the new semantics.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 plugins/kanban/dashboard/dist/index.js  | 14 ++++++++++++--
 plugins/kanban/dashboard/dist/style.css |  8 ++++++++
 web/src/i18n/af.ts                      |  2 +-
 web/src/i18n/de.ts                      |  2 +-
 web/src/i18n/en.ts                      |  5 ++++-
 web/src/i18n/es.ts                      |  2 +-
 web/src/i18n/fr.ts                      |  2 +-
 web/src/i18n/ga.ts                      |  2 +-
 web/src/i18n/hu.ts                      |  2 +-
 web/src/i18n/it.ts                      |  2 +-
 web/src/i18n/ja.ts                      |  2 +-
 web/src/i18n/ko.ts                      |  2 +-
 web/src/i18n/pt.ts                      |  2 +-
 web/src/i18n/ru.ts                      |  2 +-
 web/src/i18n/tr.ts                      |  2 +-
 web/src/i18n/types.ts                   |  2 ++
 web/src/i18n/zh-hant.ts                 |  2 +-
 web/src/i18n/zh.ts                      |  2 +-
 18 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js
index 720cdb9e1e2..6f05df72bf6 100644
--- a/plugins/kanban/dashboard/dist/index.js
+++ b/plugins/kanban/dashboard/dist/index.js
@@ -68,7 +68,7 @@
   const FALLBACK_COLUMN_HELP = {
     triage: "Raw ideas — a specifier will flesh out the spec",
     todo: "Waiting on dependencies or unassigned",
-    ready: "Assigned and waiting for a dispatcher tick",
+    ready: "Dependencies satisfied; assign a profile to dispatch",
     running: "Claimed by a worker — in-flight",
     blocked: "Worker asked for human input",
     done: "Completed",
@@ -2048,6 +2048,7 @@
     };
 
     const progress = t.progress;
+    const needsAssignee = t.status === "ready" && !t.assignee;
 
     return h("div", {
       ref: cardRef,
@@ -2118,6 +2119,13 @@
                   title: `${progress.done} of ${progress.total} child tasks done`,
                 }, `${progress.done}/${progress.total}`)
               : null,
+            needsAssignee
+              ? h(Badge, {
+                  variant: "outline",
+                  className: "hermes-kanban-needs-assignee",
+                  title: tx(i18n, "needsAssigneeHint", "Dependencies are satisfied, but the dispatcher skips this task until you assign a profile."),
+                }, tx(i18n, "needsAssignee", "Needs assignee"))
+              : null,
           ),
           h("div", { className: "hermes-kanban-card-title" },
             t.title || tx(i18n, "untitled", "(untitled)")),
@@ -2126,7 +2134,9 @@
               ? h("span", { className: "hermes-kanban-assignee",
                             title: `Assigned to Hermes profile @${t.assignee}` }, "@", t.assignee)
               : h("span", { className: "hermes-kanban-unassigned",
-                            title: "No profile assigned. The dispatcher will pick one from available profiles when the task is Ready." },
+                            title: needsAssignee
+                              ? tx(i18n, "needsAssigneeHint", "Dependencies are satisfied, but the dispatcher skips this task until you assign a profile.")
+                              : "No profile assigned." },
                   tx(i18n, "unassigned", "unassigned")),
             t.comment_count > 0
               ? h("span", { className: "hermes-kanban-count",
diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css
index 3bcfccb289b..f3d66a88597 100644
--- a/plugins/kanban/dashboard/dist/style.css
+++ b/plugins/kanban/dashboard/dist/style.css
@@ -280,6 +280,14 @@
   padding: 0.05rem 0.3rem !important;
 }
 
+.hermes-kanban-needs-assignee {
+  font-size: 0.6rem !important;
+  padding: 0.05rem 0.3rem !important;
+  background: color-mix(in srgb, var(--color-warning, #d4b348) 16%, transparent);
+  border-color: color-mix(in srgb, var(--color-warning, #d4b348) 45%, var(--color-border));
+  color: var(--color-foreground);
+}
+
 .hermes-kanban-assignee {
   font-weight: 500;
   color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
diff --git a/web/src/i18n/af.ts b/web/src/i18n/af.ts
index 4f49eb12227..e588a63596d 100644
--- a/web/src/i18n/af.ts
+++ b/web/src/i18n/af.ts
@@ -663,7 +663,7 @@ export const af: Translations = {
     columnHelp: {
       triage: "Rou idees — 'n spesifiseerder sal die spesifikasie uitwerk",
       todo: "Wag op afhanklikhede of nie toegewys nie",
-      ready: "Toegewys en wag vir 'n versender-tik",
+      ready: "Afhanklikhede is bevredig; wys 'n profiel toe om te versend",
       running: "Deur 'n werker geëis — in vlug",
       blocked: "Werker het mensinvoer aangevra",
       done: "Voltooi",
diff --git a/web/src/i18n/de.ts b/web/src/i18n/de.ts
index c70ccfe8701..28a9b59deff 100644
--- a/web/src/i18n/de.ts
+++ b/web/src/i18n/de.ts
@@ -662,7 +662,7 @@ export const de: Translations = {
     columnHelp: {
       triage: "Rohe Ideen — ein Specifier wird die Spezifikation ausarbeiten",
       todo: "Wartet auf Abhängigkeiten oder ist nicht zugewiesen",
-      ready: "Zugewiesen und wartet auf einen Dispatcher-Tick",
+      ready: "Abhängigkeiten erfüllt; Profil zum Dispatch zuweisen",
       running: "Von einem Worker übernommen — in Bearbeitung",
       blocked: "Worker hat um menschliche Eingabe gebeten",
       done: "Abgeschlossen",
diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts
index e93fdac7ec4..5eae3f9a14a 100644
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -574,6 +574,9 @@ export const en: Translations = {
     createTask: "Create task in this column",
     noTasks: "— no tasks —",
     unassigned: "unassigned",
+    needsAssignee: "Needs assignee",
+    needsAssigneeHint:
+      "Dependencies are satisfied, but the dispatcher skips this task until you assign a profile.",
     untitled: "(untitled)",
     loadingDetail: "Loading…",
     addComment: "Add a comment… (Enter to submit)",
@@ -664,7 +667,7 @@ export const en: Translations = {
     columnHelp: {
       triage: "Raw ideas — a specifier will flesh out the spec",
       todo: "Waiting on dependencies or unassigned",
-      ready: "Assigned and waiting for a dispatcher tick",
+      ready: "Dependencies satisfied; assign a profile to dispatch",
       running: "Claimed by a worker — in-flight",
       blocked: "Worker asked for human input",
       done: "Completed",
diff --git a/web/src/i18n/es.ts b/web/src/i18n/es.ts
index 19088de12c8..139a8175d44 100644
--- a/web/src/i18n/es.ts
+++ b/web/src/i18n/es.ts
@@ -662,7 +662,7 @@ export const es: Translations = {
     columnHelp: {
       triage: "Ideas en bruto — un specifier desarrollará la especificación",
       todo: "Esperando dependencias o sin asignar",
-      ready: "Asignado y esperando un tick del dispatcher",
+      ready: "Dependencias satisfechas; asigna un perfil para despachar",
       running: "Reclamado por un worker — en ejecución",
       blocked: "El worker pidió intervención humana",
       done: "Completado",
diff --git a/web/src/i18n/fr.ts b/web/src/i18n/fr.ts
index 4532cab3ee0..51b5ba54f12 100644
--- a/web/src/i18n/fr.ts
+++ b/web/src/i18n/fr.ts
@@ -662,7 +662,7 @@ export const fr: Translations = {
     columnHelp: {
       triage: "Idées brutes — un specifier rédigera la spécification",
       todo: "En attente de dépendances ou non assigné",
-      ready: "Assigné et en attente d'un tick du dispatcher",
+      ready: "Dépendances satisfaites ; assignez un profil pour dispatch",
       running: "Réclamé par un worker — en cours d'exécution",
       blocked: "Le worker a demandé une intervention humaine",
       done: "Terminé",
diff --git a/web/src/i18n/ga.ts b/web/src/i18n/ga.ts
index d75ec061b8b..4dc4e823430 100644
--- a/web/src/i18n/ga.ts
+++ b/web/src/i18n/ga.ts
@@ -663,7 +663,7 @@ export const ga: Translations = {
     columnHelp: {
       triage: "Smaointe amha — déanfaidh specifier an spec a chur i bhfeidhm",
       todo: "Ag fanacht ar spleáchais nó gan sannadh",
-      ready: "Sannta agus ag fanacht ar thic an dispatcher",
+      ready: "Tá na spleáchais sásaithe; sann próifíl le dispatch a dhéanamh",
       running: "Éilithe ag worker — ar siúl",
       blocked: "D'iarr an worker ionchur duine",
       done: "Críochnaithe",
diff --git a/web/src/i18n/hu.ts b/web/src/i18n/hu.ts
index f563c1dacc4..8b492f3bb16 100644
--- a/web/src/i18n/hu.ts
+++ b/web/src/i18n/hu.ts
@@ -663,7 +663,7 @@ export const hu: Translations = {
     columnHelp: {
       triage: "Nyers ötletek — egy specifier kidolgozza a specifikációt",
       todo: "Függőségekre vár vagy nincs felelőse",
-      ready: "Kiosztva, dispatcher tickre vár",
+      ready: "A függőségek teljesültek; rendelj hozzá profilt az indításhoz",
       running: "Worker felvette — folyamatban",
       blocked: "A worker emberi beavatkozást kért",
       done: "Befejezve",
diff --git a/web/src/i18n/it.ts b/web/src/i18n/it.ts
index 5e79d3115c3..86fce86589e 100644
--- a/web/src/i18n/it.ts
+++ b/web/src/i18n/it.ts
@@ -662,7 +662,7 @@ export const it: Translations = {
     columnHelp: {
       triage: "Idee grezze — un specifier elaborerà la specifica",
       todo: "In attesa di dipendenze o non assegnato",
-      ready: "Assegnato e in attesa di un tick del dispatcher",
+      ready: "Dipendenze soddisfatte; assegna un profilo per il dispatch",
       running: "Preso in carico da un worker — in esecuzione",
       blocked: "Il worker ha richiesto input umano",
       done: "Completato",
diff --git a/web/src/i18n/ja.ts b/web/src/i18n/ja.ts
index 175468e4d8b..154e11f5dbb 100644
--- a/web/src/i18n/ja.ts
+++ b/web/src/i18n/ja.ts
@@ -663,7 +663,7 @@ export const ja: Translations = {
     columnHelp: {
       triage: "未整理のアイデア — スペシファイアが仕様を肉付けします",
       todo: "依存関係の待機中、または未割り当て",
-      ready: "割り当て済み、ディスパッチャーのティック待ち",
+      ready: "依存関係は満たされています。ディスパッチするにはプロファイルを割り当ててください",
       running: "ワーカーが取得中 — 実行中",
       blocked: "ワーカーが人間の入力を求めています",
       done: "完了",
diff --git a/web/src/i18n/ko.ts b/web/src/i18n/ko.ts
index cfc40d63df7..4dafaeb9cde 100644
--- a/web/src/i18n/ko.ts
+++ b/web/src/i18n/ko.ts
@@ -663,7 +663,7 @@ export const ko: Translations = {
     columnHelp: {
       triage: "원시 아이디어 — 스페시파이어가 사양을 구체화합니다",
       todo: "종속성 대기 중 또는 미지정",
-      ready: "지정되었으며 디스패처 틱 대기 중",
+      ready: "종속성이 충족됨; 디스패치하려면 프로필을 지정하세요",
       running: "워커가 점유 중 — 실행 중",
       blocked: "워커가 사람의 입력을 요청함",
       done: "완료됨",
diff --git a/web/src/i18n/pt.ts b/web/src/i18n/pt.ts
index 6cdd40b8fe5..d32402dc92a 100644
--- a/web/src/i18n/pt.ts
+++ b/web/src/i18n/pt.ts
@@ -663,7 +663,7 @@ export const pt: Translations = {
     columnHelp: {
       triage: "Ideias em bruto — um specifier vai detalhar a especificação",
       todo: "À espera de dependências ou sem atribuição",
-      ready: "Atribuído e à espera de um tick do dispatcher",
+      ready: "Dependências satisfeitas; atribua um perfil para despachar",
       running: "Reivindicado por um worker — em execução",
       blocked: "O worker pediu intervenção humana",
       done: "Concluído",
diff --git a/web/src/i18n/ru.ts b/web/src/i18n/ru.ts
index c5b9a5b5038..79a6961b251 100644
--- a/web/src/i18n/ru.ts
+++ b/web/src/i18n/ru.ts
@@ -663,7 +663,7 @@ export const ru: Translations = {
     columnHelp: {
       triage: "Сырые идеи — specifier подготовит спецификацию",
       todo: "Ожидает зависимостей или без исполнителя",
-      ready: "Назначено и ждёт тика диспетчера",
+      ready: "Зависимости выполнены; назначьте профиль для диспетчеризации",
       running: "Взято воркером — выполняется",
       blocked: "Воркер запросил вмешательство человека",
       done: "Завершено",
diff --git a/web/src/i18n/tr.ts b/web/src/i18n/tr.ts
index 7de6ea1df7d..56670424abb 100644
--- a/web/src/i18n/tr.ts
+++ b/web/src/i18n/tr.ts
@@ -663,7 +663,7 @@ export const tr: Translations = {
     columnHelp: {
       triage: "Ham fikirler — bir specifier şartnameyi detaylandıracak",
       todo: "Bağımlılıklar bekleniyor veya atanmamış",
-      ready: "Atanmış ve dispatcher tick'i bekleniyor",
+      ready: "Bağımlılıklar karşılandı; dispatch için bir profil atayın",
       running: "Bir worker tarafından alındı — yürütülüyor",
       blocked: "Worker insan girdisi istedi",
       done: "Tamamlandı",
diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts
index ca40b4a381f..55669a4b679 100644
--- a/web/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@@ -586,6 +586,8 @@ export interface Translations {
     createTask: string;
     noTasks: string;
     unassigned: string;
+    needsAssignee?: string;
+    needsAssigneeHint?: string;
     untitled: string;
     loadingDetail: string;
     addComment: string;
diff --git a/web/src/i18n/zh-hant.ts b/web/src/i18n/zh-hant.ts
index c79222cfe91..27f3a41b95f 100644
--- a/web/src/i18n/zh-hant.ts
+++ b/web/src/i18n/zh-hant.ts
@@ -663,7 +663,7 @@ export const zhHant: Translations = {
     columnHelp: {
       triage: "原始想法 — 規格制定者將完善規格",
       todo: "等待相依項目或尚未指派",
-      ready: "已指派，等待排程器輪詢",
+      ready: "相依項目已滿足；指派設定檔以便排程",
       running: "已被工作者領取 — 執行中",
       blocked: "工作者請求人工輸入",
       done: "已完成",
diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts
index 0a8ceb7962a..6290c473b82 100644
--- a/web/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@@ -659,7 +659,7 @@ export const zh: Translations = {
     columnHelp: {
       triage: "原始想法 — 规范制定者将完善规格",
       todo: "等待依赖项或未分配",
-      ready: "已分配，等待调度器轮询",
+      ready: "依赖项已满足；分配一个配置文件以便调度",
       running: "已被工作者认领 — 执行中",
       blocked: "工作者请求人工输入",
       done: "已完成",

From 97a32afdc490e3d40b291dac0e67f291502052a0 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Fri, 15 May 2026 18:43:39 -0600
Subject: [PATCH 101/218] fix(auxiliary): resolve xai oauth compression from
 pool

---
 agent/auxiliary_client.py            | 38 +++++++++++++--
 run_agent.py                         | 19 ++++++--
 tests/agent/test_auxiliary_client.py | 72 ++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 1c7dd9f7497..cfc44e5f2a6 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1272,12 +1272,40 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
 def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]:
     """Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients.
 
-    Routes through ``hermes_cli.auth``'s runtime resolver so the auto-refresh
-    path is shared with the main agent, instead of relying on whatever raw
-    tokens happen to be sitting in auth.json or the credential pool.  Returns
-    ``None`` if the user is not authenticated with xAI Grok OAuth (so
-    ``_resolve_auto`` Step 1 falls through to the next provider in the chain).
+    Prefer the credential pool, matching the main runtime/provider status
+    path.  Some xAI OAuth logins live only as pool entries; falling straight
+    to the singleton auth-store resolver would make auxiliary tasks such as
+    compression report "no provider configured" even though ``hermes auth
+    status`` shows xAI OAuth as logged in.
+
+    Falls back to ``hermes_cli.auth``'s singleton runtime resolver for older
+    auth-store-only logins. Returns ``None`` if the user is not authenticated
+    with xAI Grok OAuth.
     """
+    try:
+        from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
+
+        pool = load_pool("xai-oauth")
+        if pool and pool.has_credentials():
+            entry = pool.select()
+            if entry is not None:
+                api_key = str(
+                    getattr(entry, "runtime_api_key", None)
+                    or getattr(entry, "access_token", "")
+                    or ""
+                ).strip()
+                base_url = str(
+                    os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
+                    or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
+                    or getattr(entry, "runtime_base_url", None)
+                    or getattr(entry, "base_url", None)
+                    or DEFAULT_XAI_OAUTH_BASE_URL
+                ).strip().rstrip("/")
+                if api_key and base_url:
+                    return api_key, base_url
+    except Exception as exc:
+        logger.debug("Auxiliary xAI OAuth pool credential resolution failed: %s", exc)
+
     try:
         from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
 
diff --git a/run_agent.py b/run_agent.py
index 85c1128d68e..b3cde9eb1ea 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3237,11 +3237,20 @@ class AIAgent:
             except Exception:
                 _aux_cfg_provider = ""
             if client is None or not aux_model:
-                msg = (
-                    "⚠ No auxiliary LLM provider configured — context "
-                    "compression will drop middle turns without a summary. "
-                    "Run `hermes setup` or set OPENROUTER_API_KEY."
-                )
+                if _aux_cfg_provider and _aux_cfg_provider != "auto":
+                    msg = (
+                        "⚠ Configured auxiliary compression provider "
+                        f"'{_aux_cfg_provider}' is unavailable — context "
+                        "compression will drop middle turns without a summary. "
+                        "Check auxiliary.compression in config.yaml and "
+                        "reauthenticate that provider."
+                    )
+                else:
+                    msg = (
+                        "⚠ No auxiliary LLM provider configured — context "
+                        "compression will drop middle turns without a summary. "
+                        "Run `hermes setup` or set OPENROUTER_API_KEY."
+                    )
                 self._compression_warning = msg
                 self._emit_status(msg)
                 logger.warning(
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 9dd85762956..96f5802f839 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -26,6 +26,7 @@ from agent.auxiliary_client import (
     _normalize_aux_provider,
     _try_payment_fallback,
     _resolve_auto,
+    _resolve_xai_oauth_for_aux,
     _CodexCompletionsAdapter,
 )
 
@@ -221,6 +222,77 @@ class TestReadCodexAccessToken:
         assert result == "plain-token-no-jwt"
 
 
+class TestResolveXaiOAuthForAux:
+    def test_uses_pool_backed_credentials_without_singleton(self, tmp_path, monkeypatch):
+        """Auxiliary xAI OAuth must see pool-only credentials.
+
+        ``hermes auth status`` already reports these as logged in; compression
+        should not fall through to "no auxiliary provider configured" just
+        because the singleton auth-store entry is absent.
+        """
+        from agent.credential_pool import AUTH_TYPE_OAUTH, PooledCredential, load_pool
+        from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
+
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        (hermes_home / "auth.json").write_text(json.dumps({
+            "version": 1,
+            "providers": {},
+        }))
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("HERMES_XAI_BASE_URL", raising=False)
+        monkeypatch.delenv("XAI_BASE_URL", raising=False)
+
+        pool = load_pool("xai-oauth")
+        pool.add_entry(PooledCredential(
+            provider="xai-oauth",
+            id="xai123",
+            label="pool-only",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token="pool-access-token",
+            refresh_token="pool-refresh-token",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        ))
+
+        assert _resolve_xai_oauth_for_aux() == (
+            "pool-access-token",
+            DEFAULT_XAI_OAUTH_BASE_URL,
+        )
+
+    def test_pool_backed_credentials_honor_base_url_env_override(self, tmp_path, monkeypatch):
+        from agent.credential_pool import AUTH_TYPE_OAUTH, PooledCredential, load_pool
+        from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
+
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        (hermes_home / "auth.json").write_text(json.dumps({
+            "version": 1,
+            "providers": {},
+        }))
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setenv("HERMES_XAI_BASE_URL", "https://example.x.ai/v1/")
+
+        pool = load_pool("xai-oauth")
+        pool.add_entry(PooledCredential(
+            provider="xai-oauth",
+            id="xai456",
+            label="pool-only",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source="manual:xai_pkce",
+            access_token="pool-access-token",
+            refresh_token="pool-refresh-token",
+            base_url=DEFAULT_XAI_OAUTH_BASE_URL,
+        ))
+
+        assert _resolve_xai_oauth_for_aux() == (
+            "pool-access-token",
+            "https://example.x.ai/v1",
+        )
+
+
 class TestAnthropicOAuthFlag:
     """Test that OAuth tokens get is_oauth=True in auxiliary Anthropic client."""
 

From 44b63fc6de3fe2b53eac3109b4a20db41c663195 Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Fri, 15 May 2026 21:59:28 -0500
Subject: [PATCH 102/218] fix(tui): allow transcript scroll + Esc during
 approval/clarify/confirm prompts (#26414)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When an approval / clarify / confirm overlay was active, the global input
handler in useInputHandlers returned for every key that wasn't Ctrl+C, which
silently disabled transcript scrolling. On long threads the context the
prompt was asking about often lived above the visible viewport, and being
unable to scroll while answering felt like the prompt had locked the UI.
ApprovalPrompt also had no Esc handler at all, so the one obvious 'abort'
key did nothing during a permission prompt and the user had to memorize
Ctrl+C or hunt for the deny number.

Fixes:

- Extract shouldFallThroughForScroll(key) (pure, exported) covering wheel
  scrolls, PageUp/PageDown, and Shift+ArrowUp/Down. When a prompt overlay
  is up and the pressed key is a scroll input, skip the early return so it
  reaches the existing wheel/PageUp/Shift+arrow handlers below. Plain
  arrows still drive in-prompt selection — they don't fall through.
- ApprovalPrompt now maps Esc to onChoice('deny'), parity with the global
  Ctrl+C cancellation path that already invokes cancelOverlayFromCtrlC()
  for approvals. The bottom-of-prompt hint now advertises 'Esc/Ctrl+C deny'.
- Extract approvalAction(ch, key, sel) — pure key-dispatch helper for the
  approval prompt, exported so the regression matrix (Esc, numbers, Enter,
  arrows, edge clamping, precedence) is testable without mounting Ink.

Tests:
- useInputHandlers.test.ts: 6 cases covering shouldFallThroughForScroll
  positives (wheel/PageUp/PageDown/Shift+arrows) and negatives (plain
  arrows, bare shift, no scroll key).
- approvalAction.test.ts: 8 cases covering Esc→deny, numeric mapping,
  Enter, ↑↓ within bounds, edge clamping, Esc-beats-others precedence,
  unrelated keystrokes.
---
 ui-tui/src/__tests__/approvalAction.test.ts   | 50 +++++++++++++
 ui-tui/src/__tests__/useInputHandlers.test.ts | 42 ++++++++++-
 ui-tui/src/app/useInputHandlers.ts            | 57 ++++++++++++++-
 ui-tui/src/components/prompts.tsx             | 73 ++++++++++++++-----
 4 files changed, 201 insertions(+), 21 deletions(-)
 create mode 100644 ui-tui/src/__tests__/approvalAction.test.ts

diff --git a/ui-tui/src/__tests__/approvalAction.test.ts b/ui-tui/src/__tests__/approvalAction.test.ts
new file mode 100644
index 00000000000..851b5093448
--- /dev/null
+++ b/ui-tui/src/__tests__/approvalAction.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it } from 'vitest'
+
+import { approvalAction } from '../components/prompts.js'
+
+describe('approvalAction — pure key dispatch for ApprovalPrompt', () => {
+  it('maps Esc to deny — parity with global Ctrl+C cancellation', () => {
+    expect(approvalAction('', { escape: true }, 0)).toEqual({ kind: 'choose', choice: 'deny' })
+    expect(approvalAction('', { escape: true }, 2)).toEqual({ kind: 'choose', choice: 'deny' })
+  })
+
+  it('maps number keys 1..4 to once/session/always/deny in registration order', () => {
+    expect(approvalAction('1', {}, 0)).toEqual({ kind: 'choose', choice: 'once' })
+    expect(approvalAction('2', {}, 0)).toEqual({ kind: 'choose', choice: 'session' })
+    expect(approvalAction('3', {}, 0)).toEqual({ kind: 'choose', choice: 'always' })
+    expect(approvalAction('4', {}, 0)).toEqual({ kind: 'choose', choice: 'deny' })
+  })
+
+  it('ignores out-of-range numbers', () => {
+    expect(approvalAction('0', {}, 1)).toEqual({ kind: 'noop' })
+    expect(approvalAction('5', {}, 1)).toEqual({ kind: 'noop' })
+    expect(approvalAction('9', {}, 1)).toEqual({ kind: 'noop' })
+  })
+
+  it('confirms the current selection on Enter', () => {
+    expect(approvalAction('', { return: true }, 0)).toEqual({ kind: 'choose', choice: 'once' })
+    expect(approvalAction('', { return: true }, 3)).toEqual({ kind: 'choose', choice: 'deny' })
+  })
+
+  it('moves selection up/down within bounds', () => {
+    expect(approvalAction('', { upArrow: true }, 2)).toEqual({ kind: 'move', delta: -1 })
+    expect(approvalAction('', { downArrow: true }, 1)).toEqual({ kind: 'move', delta: 1 })
+  })
+
+  it('clamps selection movement at the edges', () => {
+    expect(approvalAction('', { upArrow: true }, 0)).toEqual({ kind: 'noop' })
+    expect(approvalAction('', { downArrow: true }, 3)).toEqual({ kind: 'noop' })
+  })
+
+  it('Esc beats numeric/return — denying is always the first interpretation', () => {
+    // If a terminal somehow delivers Esc + a digit in the same event, deny
+    // wins.  Documents the precedence so a future refactor doesn't flip it.
+    expect(approvalAction('1', { escape: true }, 0)).toEqual({ kind: 'choose', choice: 'deny' })
+    expect(approvalAction('', { escape: true, return: true }, 1)).toEqual({ kind: 'choose', choice: 'deny' })
+  })
+
+  it('returns noop for unrelated keystrokes (printable letters etc.)', () => {
+    expect(approvalAction('a', {}, 0)).toEqual({ kind: 'noop' })
+    expect(approvalAction(' ', {}, 0)).toEqual({ kind: 'noop' })
+  })
+})
diff --git a/ui-tui/src/__tests__/useInputHandlers.test.ts b/ui-tui/src/__tests__/useInputHandlers.test.ts
index 066292abfa5..0d3fd69c1ed 100644
--- a/ui-tui/src/__tests__/useInputHandlers.test.ts
+++ b/ui-tui/src/__tests__/useInputHandlers.test.ts
@@ -1,6 +1,46 @@
 import { describe, expect, it, vi } from 'vitest'
 
-import { applyVoiceRecordResponse } from '../app/useInputHandlers.js'
+import { applyVoiceRecordResponse, shouldFallThroughForScroll } from '../app/useInputHandlers.js'
+
+const baseKey = {
+  downArrow: false,
+  pageDown: false,
+  pageUp: false,
+  shift: false,
+  upArrow: false,
+  wheelDown: false,
+  wheelUp: false
+}
+
+describe('shouldFallThroughForScroll — keep transcript scrolling alive during prompt overlays', () => {
+  it('falls through for wheel scrolls', () => {
+    expect(shouldFallThroughForScroll({ ...baseKey, wheelUp: true })).toBe(true)
+    expect(shouldFallThroughForScroll({ ...baseKey, wheelDown: true })).toBe(true)
+  })
+
+  it('falls through for PageUp / PageDown', () => {
+    expect(shouldFallThroughForScroll({ ...baseKey, pageUp: true })).toBe(true)
+    expect(shouldFallThroughForScroll({ ...baseKey, pageDown: true })).toBe(true)
+  })
+
+  it('falls through for Shift+ArrowUp / Shift+ArrowDown', () => {
+    expect(shouldFallThroughForScroll({ ...baseKey, shift: true, upArrow: true })).toBe(true)
+    expect(shouldFallThroughForScroll({ ...baseKey, shift: true, downArrow: true })).toBe(true)
+  })
+
+  it('does NOT fall through for plain arrows — those drive in-prompt selection', () => {
+    expect(shouldFallThroughForScroll({ ...baseKey, upArrow: true })).toBe(false)
+    expect(shouldFallThroughForScroll({ ...baseKey, downArrow: true })).toBe(false)
+  })
+
+  it('does NOT fall through for plain Shift — without an arrow it is a no-op', () => {
+    expect(shouldFallThroughForScroll({ ...baseKey, shift: true })).toBe(false)
+  })
+
+  it('does NOT fall through for unrelated state (no scroll keys held)', () => {
+    expect(shouldFallThroughForScroll(baseKey)).toBe(false)
+  })
+})
 
 describe('applyVoiceRecordResponse', () => {
   it('reverts optimistic REC state when the gateway reports voice busy', () => {
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index ce25af70edd..59de48a310d 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -23,6 +23,42 @@ import { getUiState } from './uiStore.js'
 
 const isCtrl = (key: { ctrl: boolean }, ch: string, target: string) => key.ctrl && ch.toLowerCase() === target
 
+/**
+ * Approval / clarify / confirm overlays mount their own `useInput` handlers
+ * for the in-prompt keys (arrows, numbers, Enter, sometimes Esc).  The global
+ * input handler used to early-return for any other key while one of those
+ * overlays was up, which silently disabled transcript scrolling — the user
+ * couldn't read context above the prompt that the prompt itself was asking
+ * about.  Returns true when the key is a transcript-scroll input that should
+ * fall through to the global scroll handlers even while a prompt is active.
+ *
+ * Modifier-held wheel (precision mode) is included — a user who wants to
+ * scroll a single line at a time during a prompt expects it to work.
+ */
+export function shouldFallThroughForScroll(key: {
+  downArrow: boolean
+  pageDown: boolean
+  pageUp: boolean
+  shift: boolean
+  upArrow: boolean
+  wheelDown: boolean
+  wheelUp: boolean
+}): boolean {
+  if (key.wheelUp || key.wheelDown) {
+    return true
+  }
+
+  if (key.pageUp || key.pageDown) {
+    return true
+  }
+
+  if (key.shift && (key.upArrow || key.downArrow)) {
+    return true
+  }
+
+  return false
+}
+
 export function applyVoiceRecordResponse(
   response: null | VoiceRecordResponse,
   starting: boolean,
@@ -224,7 +260,18 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       // handlers must receive keystrokes (arrow keys, numbers, Enter).  Only
       // intercept Ctrl+C here so the user can deny/dismiss — all other keys
       // fall through to the component-level handlers.
-      if (overlay.approval || overlay.clarify || overlay.confirm) {
+      //
+      // Scroll inputs (wheel / PageUp / PageDown / Shift+↑↓) are special:
+      // they must reach the transcript scroll handlers below even with a
+      // prompt up.  Long-thread context the prompt is asking about often
+      // lives above the visible viewport, and being unable to read it while
+      // answering felt like the prompt had locked the entire UI.  Explicitly
+      // skip the prompt-overlay early-return for scroll keys so they fall
+      // through to the wheel / PageUp / Shift+arrow handlers below.
+      const promptOverlay = overlay.approval || overlay.clarify || overlay.confirm
+      const fallThroughForScroll = promptOverlay && shouldFallThroughForScroll(key)
+
+      if (promptOverlay && !fallThroughForScroll) {
         if (isCtrl(key, ch, 'c')) {
           cancelOverlayFromCtrlC()
         }
@@ -298,7 +345,13 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
         patchOverlayState({ picker: false })
       }
 
-      return
+      // When a prompt overlay is up and the user pressed a scroll key, fall
+      // through to the global scroll handlers below instead of returning.
+      // Otherwise nothing above this comment matched, and there's nothing
+      // useful to do for an arbitrary key while blocked.
+      if (!fallThroughForScroll) {
+        return
+      }
     }
 
     if (cState.completions.length && cState.input && cState.historyIdx === null && (key.upArrow || key.downArrow)) {
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index e9d42485d9b..3dfd31be869 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -11,28 +11,65 @@ const OPTS = ['once', 'session', 'always', 'deny'] as const
 const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const
 const CMD_PREVIEW_LINES = 10
 
+type ApprovalKey = {
+  downArrow?: boolean
+  escape?: boolean
+  return?: boolean
+  upArrow?: boolean
+}
+
+type ApprovalAction =
+  | { kind: 'choose'; choice: (typeof OPTS)[number] }
+  | { kind: 'move'; delta: -1 | 1 }
+  | { kind: 'noop' }
+
+/**
+ * Pure key-dispatch for the approval prompt — exported so the regression
+ * matrix (Esc, Ctrl+C-equivalent, number keys, Enter, ↑↓) is testable
+ * without mounting React + Ink + a fake stdin.  The component just maps the
+ * action onto its own state setters.
+ *
+ * Esc and number keys both terminate the prompt; Esc maps to deny (parity
+ * with the global Ctrl+C handler that already calls cancelOverlayFromCtrlC
+ * for approvals).  Numbers 1..OPTS.length pick the labelled choice.  Enter
+ * confirms the current selection.  ↑/↓ moves the selection within bounds.
+ */
+export function approvalAction(ch: string, key: ApprovalKey, sel: number): ApprovalAction {
+  if (key.escape) {
+    return { kind: 'choose', choice: 'deny' }
+  }
+
+  const n = parseInt(ch, 10)
+
+  if (n >= 1 && n <= OPTS.length) {
+    return { kind: 'choose', choice: OPTS[n - 1]! }
+  }
+
+  if (key.return) {
+    return { kind: 'choose', choice: OPTS[sel]! }
+  }
+
+  if (key.upArrow && sel > 0) {
+    return { kind: 'move', delta: -1 }
+  }
+
+  if (key.downArrow && sel < OPTS.length - 1) {
+    return { kind: 'move', delta: 1 }
+  }
+
+  return { kind: 'noop' }
+}
+
 export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
   const [sel, setSel] = useState(0)
 
   useInput((ch, key) => {
-    if (key.upArrow && sel > 0) {
-      setSel(s => s - 1)
-    }
+    const action = approvalAction(ch, key, sel)
 
-    if (key.downArrow && sel < OPTS.length - 1) {
-      setSel(s => s + 1)
-    }
-
-    const n = parseInt(ch, 10)
-
-    if (n >= 1 && n <= OPTS.length) {
-      onChoice(OPTS[n - 1]!)
-
-      return
-    }
-
-    if (key.return) {
-      onChoice(OPTS[sel]!)
+    if (action.kind === 'choose') {
+      onChoice(action.choice)
+    } else if (action.kind === 'move') {
+      setSel(s => s + action.delta)
     }
   })
 
@@ -71,7 +108,7 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
         </Text>
       ))}
 
-      <Text color={t.color.muted}>↑/↓ select · Enter confirm · 1-4 quick pick · Ctrl+C deny</Text>
+      <Text color={t.color.muted}>↑/↓ select · Enter confirm · 1-4 quick pick · Esc/Ctrl+C deny</Text>
     </Box>
   )
 }

From a31191c3f57e2463ce4253cb1d95f93c52f3df14 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 20:29:20 -0700
Subject: [PATCH 103/218] fix(docs): unique sidebar keys for duplicate skill
 categories (#26726)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-skill sidebar tree from PR #26646 emitted category entries with
only a label. Docusaurus derives translation keys from the label
(sidebar.docs.category.<label>), and categories that exist in both
Bundled and Optional (productivity, mcp, mlops, research, email,
software-development, dogfood) collided on identical keys — failing
i18n extraction and the Deploy Site build. Result: source had the
sidebar fix but no per-skill page rendered with a sidebar in production.

Add a 'key: skills-<source>-<category>' attribute to each generated
category dict so Bundled vs Optional get distinct translation keys.
Regenerated sidebars.ts via the script. Local docusaurus build passes.
---
 website/scripts/generate-skill-docs.py | 15 ++++++++---
 website/sidebars.ts                    | 37 ++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/website/scripts/generate-skill-docs.py b/website/scripts/generate-skill-docs.py
index 2a0694a61c8..c932f01e1bc 100755
--- a/website/scripts/generate-skill-docs.py
+++ b/website/scripts/generate-skill-docs.py
@@ -602,7 +602,7 @@ def build_sidebar_items(entries: list[tuple[dict[str, Any], dict[str, Any]]]) ->
         else:
             optional[meta["category"]].append(meta)
 
-    def cat_section(bucket: dict[str, list[dict[str, Any]]]) -> list[dict]:
+    def cat_section(bucket: dict[str, list[dict[str, Any]]], source: str) -> list[dict]:
         result = []
         for category in sorted(bucket):
             items = sorted(bucket[category], key=lambda m: m["slug"])
@@ -610,6 +610,13 @@ def build_sidebar_items(entries: list[tuple[dict[str, Any], dict[str, Any]]]) ->
                 {
                     "type": "category",
                     "label": category,
+                    # Docusaurus generates a translation key from the label by
+                    # default (e.g. sidebar.docs.category.productivity). When
+                    # the same category name appears under both Bundled and
+                    # Optional, the duplicate keys break i18n extraction and
+                    # fail the build. Scope each category by source to keep
+                    # the keys unique.
+                    "key": f"skills-{source}-{category}",
                     "collapsed": True,
                     "items": [sidebar_doc_id(m) for m in items],
                 }
@@ -617,8 +624,8 @@ def build_sidebar_items(entries: list[tuple[dict[str, Any], dict[str, Any]]]) ->
         return result
 
     return {
-        "bundled_categories": cat_section(bundled),
-        "optional_categories": cat_section(optional),
+        "bundled_categories": cat_section(bundled, "bundled"),
+        "optional_categories": cat_section(optional, "optional"),
     }
 
 
@@ -633,6 +640,8 @@ def _render_sidebar_item(item: Any, indent: int) -> list[str]:
     lines.append(f"{pad}{{")
     lines.append(f"{pad}  type: 'category',")
     lines.append(f"{pad}  label: '{item['label']}',")
+    if item.get("key"):
+        lines.append(f"{pad}  key: '{item['key']}',")
     if item.get("collapsed", True):
         lines.append(f"{pad}  collapsed: true,")
     lines.append(f"{pad}  items: [")
diff --git a/website/sidebars.ts b/website/sidebars.ts
index fe7b741eb2e..52ed452d046 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -121,6 +121,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'apple',
+                  key: 'skills-bundled-apple',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/apple/apple-apple-notes',
@@ -133,6 +134,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'autonomous-ai-agents',
+                  key: 'skills-bundled-autonomous-ai-agents',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code',
@@ -144,6 +146,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'creative',
+                  key: 'skills-bundled-creative',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/creative/creative-architecture-diagram',
@@ -170,6 +173,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'data-science',
+                  key: 'skills-bundled-data-science',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel',
@@ -178,6 +182,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'devops',
+                  key: 'skills-bundled-devops',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/devops/devops-kanban-orchestrator',
@@ -188,6 +193,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'dogfood',
+                  key: 'skills-bundled-dogfood',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/dogfood/dogfood-dogfood',
@@ -196,6 +202,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'email',
+                  key: 'skills-bundled-email',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/email/email-himalaya',
@@ -204,6 +211,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'gaming',
+                  key: 'skills-bundled-gaming',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server',
@@ -213,6 +221,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'github',
+                  key: 'skills-bundled-github',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/github/github-codebase-inspection',
@@ -226,6 +235,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'mcp',
+                  key: 'skills-bundled-mcp',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/mcp/mcp-native-mcp',
@@ -234,6 +244,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'media',
+                  key: 'skills-bundled-media',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/media/media-gif-search',
@@ -246,6 +257,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'mlops',
+                  key: 'skills-bundled-mlops',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/mlops/mlops-models-audiocraft',
@@ -262,6 +274,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'note-taking',
+                  key: 'skills-bundled-note-taking',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/note-taking/note-taking-obsidian',
@@ -270,6 +283,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'productivity',
+                  key: 'skills-bundled-productivity',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/productivity/productivity-airtable',
@@ -286,6 +300,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'red-teaming',
+                  key: 'skills-bundled-red-teaming',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/red-teaming/red-teaming-godmode',
@@ -294,6 +309,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'research',
+                  key: 'skills-bundled-research',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/research/research-arxiv',
@@ -306,6 +322,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'smart-home',
+                  key: 'skills-bundled-smart-home',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/smart-home/smart-home-openhue',
@@ -314,6 +331,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'social-media',
+                  key: 'skills-bundled-social-media',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/social-media/social-media-xurl',
@@ -322,6 +340,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'software-development',
+                  key: 'skills-bundled-software-development',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands',
@@ -340,6 +359,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'yuanbao',
+                  key: 'skills-bundled-yuanbao',
                   collapsed: true,
                   items: [
                     'user-guide/skills/bundled/yuanbao/yuanbao-yuanbao',
@@ -355,6 +375,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'autonomous-ai-agents',
+                  key: 'skills-optional-autonomous-ai-agents',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox',
@@ -364,6 +385,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'blockchain',
+                  key: 'skills-optional-blockchain',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/blockchain/blockchain-evm',
@@ -374,6 +396,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'communication',
+                  key: 'skills-optional-communication',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/communication/communication-one-three-one-rule',
@@ -382,6 +405,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'creative',
+                  key: 'skills-optional-creative',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/creative/creative-blender-mcp',
@@ -394,6 +418,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'devops',
+                  key: 'skills-optional-devops',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/devops/devops-cli',
@@ -404,6 +429,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'dogfood',
+                  key: 'skills-optional-dogfood',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test',
@@ -412,6 +438,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'email',
+                  key: 'skills-optional-email',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/email/email-agentmail',
@@ -420,6 +447,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'finance',
+                  key: 'skills-optional-finance',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/finance/finance-3-statement-model',
@@ -435,6 +463,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'health',
+                  key: 'skills-optional-health',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/health/health-fitness-nutrition',
@@ -444,6 +473,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'mcp',
+                  key: 'skills-optional-mcp',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/mcp/mcp-fastmcp',
@@ -453,6 +483,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'migration',
+                  key: 'skills-optional-migration',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/migration/migration-openclaw-migration',
@@ -461,6 +492,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'mlops',
+                  key: 'skills-optional-mlops',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/mlops/mlops-accelerate',
@@ -496,6 +528,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'productivity',
+                  key: 'skills-optional-productivity',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/productivity/productivity-canvas',
@@ -510,6 +543,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'research',
+                  key: 'skills-optional-research',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/research/research-bioinformatics',
@@ -526,6 +560,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'security',
+                  key: 'skills-optional-security',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/security/security-1password',
@@ -536,6 +571,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'software-development',
+                  key: 'skills-optional-software-development',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/software-development/software-development-rest-graphql-debug',
@@ -544,6 +580,7 @@ const sidebars: SidebarsConfig = {
                 {
                   type: 'category',
                   label: 'web-development',
+                  key: 'skills-optional-web-development',
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/web-development/web-development-page-agent',

From c5dc9700ebc8b890e349c0cc3e978d133395909b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 20:29:28 -0700
Subject: [PATCH 104/218] fix(windows): silence tirith-unavailable banner +
 skip install/spawn attempts on unsupported platforms (#26718)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tirith ships no Windows binary, so on every Windows CLI startup users
saw a scary 'tirith security scanner enabled but not available' banner
they could not act on. The banner suggested degraded security; in
reality pattern-matching guards still run and the message was pure noise.

Fix:
- New public is_platform_supported() helper in tools/tirith_security.py
  that returns False when _detect_target() doesn't resolve (Windows, any
  non-x86_64/aarch64 arch).
- ensure_installed(), _resolve_tirith_path(), and check_command_security()
  short-circuit on unsupported platforms: cache _resolved_path =
  _INSTALL_FAILED with reason 'unsupported_platform', skip PATH probes,
  skip the background download thread, skip the disk failure marker, and
  return allow with an empty summary from check_command_security so the
  spawn loop never fires.
- Explicit user-configured tirith_path is still honored everywhere (a
  user who built tirith themselves under WSL keeps that path).
- CLI banner in cli.py gated on is_platform_supported() — fires only on
  platforms where tirith *should* work but isn't installed.
- Docs note tirith's supported-platform list and point Windows users at
  WSL.

Tests: tests/tools/test_tirith_security.py +8 tests covering Linux
x86_64, Darwin arm64, Windows, and unknown-arch verdicts plus the
silent ensure_installed / check_command_security / _resolve_tirith_path
fast-paths and the explicit-path override.

  test_tirith_security.py     75 passed (8 new + 67 pre-existing)
  test_command_guards.py      19 passed
---
 cli.py                              |  8 ++-
 tests/tools/test_tirith_security.py | 97 +++++++++++++++++++++++++++++
 tools/tirith_security.py            | 40 +++++++++++-
 website/docs/user-guide/security.md |  2 +
 4 files changed, 143 insertions(+), 4 deletions(-)

diff --git a/cli.py b/cli.py
index 50e7a8c8ce9..241d41e9fcd 100644
--- a/cli.py
+++ b/cli.py
@@ -11736,11 +11736,13 @@ class HermesCLI:
 
         # Ensure tirith security scanner is available (downloads if needed).
         # Warn the user if tirith is enabled in config but not available,
-        # so they know command security scanning is degraded.
+        # so they know command security scanning is degraded.  Suppressed
+        # on platforms where tirith ships no binary (Windows etc.) — the
+        # user can't act on it and pattern-matching guards still run.
         try:
-            from tools.tirith_security import ensure_installed
+            from tools.tirith_security import ensure_installed, is_platform_supported
             tirith_path = ensure_installed(log_failures=False)
-            if tirith_path is None:
+            if tirith_path is None and is_platform_supported():
                 security_cfg = self.config.get("security", {}) or {}
                 tirith_enabled = security_cfg.get("tirith_enabled", True)
                 if tirith_enabled:
diff --git a/tests/tools/test_tirith_security.py b/tests/tools/test_tirith_security.py
index ecaf4f4e639..afeb14f9458 100644
--- a/tests/tools/test_tirith_security.py
+++ b/tests/tools/test_tirith_security.py
@@ -333,6 +333,103 @@ class TestEnsureInstalled:
         _tirith_mod._resolved_path = None
 
 
+# ---------------------------------------------------------------------------
+# Unsupported platform (Windows etc.) — silent fast-path everywhere
+# ---------------------------------------------------------------------------
+
+class TestUnsupportedPlatform:
+    """When _detect_target() returns None (no tirith binary for this OS+arch),
+    the entire subsystem must stay silent: no PATH probes, no download thread,
+    no disk failure marker, no spawn attempts, no CLI banner. Pattern-matching
+    guards still cover the gap; tirith content scanning is just absent."""
+
+    def test_is_platform_supported_true_on_linux_x86_64(self):
+        with patch("tools.tirith_security.platform.system", return_value="Linux"), \
+             patch("tools.tirith_security.platform.machine", return_value="x86_64"):
+            assert _tirith_mod.is_platform_supported() is True
+
+    def test_is_platform_supported_true_on_darwin_arm64(self):
+        with patch("tools.tirith_security.platform.system", return_value="Darwin"), \
+             patch("tools.tirith_security.platform.machine", return_value="arm64"):
+            assert _tirith_mod.is_platform_supported() is True
+
+    def test_is_platform_supported_false_on_windows(self):
+        with patch("tools.tirith_security.platform.system", return_value="Windows"), \
+             patch("tools.tirith_security.platform.machine", return_value="AMD64"):
+            assert _tirith_mod.is_platform_supported() is False
+
+    def test_is_platform_supported_false_on_unknown_arch(self):
+        with patch("tools.tirith_security.platform.system", return_value="Linux"), \
+             patch("tools.tirith_security.platform.machine", return_value="riscv64"):
+            assert _tirith_mod.is_platform_supported() is False
+
+    @patch("tools.tirith_security._load_security_config")
+    def test_ensure_installed_unsupported_returns_none_no_thread(self, mock_cfg):
+        """Windows: don't start a background install thread, don't write a
+        failure marker — just cache the verdict and return None."""
+        mock_cfg.return_value = {"tirith_enabled": True, "tirith_path": "tirith",
+                                 "tirith_timeout": 5, "tirith_fail_open": True}
+        _tirith_mod._resolved_path = None
+        with patch("tools.tirith_security.is_platform_supported", return_value=False), \
+             patch("tools.tirith_security.threading.Thread") as MockThread, \
+             patch("tools.tirith_security._mark_install_failed") as mock_mark, \
+             patch("tools.tirith_security.shutil.which") as mock_which:
+            result = ensure_installed()
+            assert result is None
+            MockThread.assert_not_called()
+            mock_mark.assert_not_called()
+            mock_which.assert_not_called()
+            assert _tirith_mod._resolved_path is _tirith_mod._INSTALL_FAILED
+            assert _tirith_mod._install_failure_reason == "unsupported_platform"
+
+    @patch("tools.tirith_security._load_security_config")
+    def test_check_command_security_unsupported_allows_silently(self, mock_cfg):
+        """Windows: skip the resolver and spawn entirely — return allow with
+        an empty summary so callers can't accidentally surface 'tirith
+        unavailable' messaging to the user."""
+        mock_cfg.return_value = {"tirith_enabled": True, "tirith_path": "tirith",
+                                 "tirith_timeout": 5, "tirith_fail_open": True}
+        with patch("tools.tirith_security.is_platform_supported", return_value=False), \
+             patch("tools.tirith_security.subprocess.run") as mock_run, \
+             patch("tools.tirith_security._resolve_tirith_path") as mock_resolve:
+            result = check_command_security("rm -rf /")
+            assert result == {"action": "allow", "findings": [], "summary": ""}
+            mock_run.assert_not_called()
+            mock_resolve.assert_not_called()
+
+    @patch("tools.tirith_security._load_security_config")
+    def test_resolve_path_unsupported_caches_failure_without_probing(self, mock_cfg):
+        """The per-command resolver must also short-circuit on Windows so
+        long-running gateways don't churn through `shutil.which` and disk
+        I/O for every scanned command."""
+        mock_cfg.return_value = {"tirith_enabled": True, "tirith_path": "tirith",
+                                 "tirith_timeout": 5, "tirith_fail_open": True}
+        _tirith_mod._resolved_path = None
+        with patch("tools.tirith_security.is_platform_supported", return_value=False), \
+             patch("tools.tirith_security.shutil.which") as mock_which:
+            result = _tirith_mod._resolve_tirith_path("tirith")
+            assert result == "tirith"
+            mock_which.assert_not_called()
+            assert _tirith_mod._resolved_path is _tirith_mod._INSTALL_FAILED
+            assert _tirith_mod._install_failure_reason == "unsupported_platform"
+
+    @patch("tools.tirith_security._load_security_config")
+    def test_explicit_path_still_honored_on_unsupported_platform(self, mock_cfg):
+        """If a user explicitly configured a tirith_path (e.g. they built it
+        themselves under WSL), the unsupported-platform short-circuit must
+        NOT override that — explicit config wins."""
+        mock_cfg.return_value = {"tirith_enabled": True,
+                                 "tirith_path": "/opt/custom/tirith",
+                                 "tirith_timeout": 5, "tirith_fail_open": True}
+        _tirith_mod._resolved_path = None
+        with patch("tools.tirith_security.is_platform_supported", return_value=False), \
+             patch("os.path.isfile", return_value=True), \
+             patch("os.access", return_value=True):
+            result = _tirith_mod._resolve_tirith_path("/opt/custom/tirith")
+            assert result == "/opt/custom/tirith"
+            assert _tirith_mod._resolved_path == "/opt/custom/tirith"
+
+
 # ---------------------------------------------------------------------------
 # Failed download caches the miss (Finding #1)
 # ---------------------------------------------------------------------------
diff --git a/tools/tirith_security.py b/tools/tirith_security.py
index 1c79892f424..b45d7d29213 100644
--- a/tools/tirith_security.py
+++ b/tools/tirith_security.py
@@ -214,7 +214,12 @@ def _hermes_bin_dir() -> str:
 
 
 def _detect_target() -> str | None:
-    """Return the Rust target triple for the current platform, or None."""
+    """Return the Rust target triple for the current platform, or None.
+
+    Windows is intentionally unsupported — tirith does not ship a Windows
+    build. Callers should treat `None` as "this platform will never have
+    tirith" and silently fall back to pattern-matching guards.
+    """
     system = platform.system()
     machine = platform.machine().lower()
 
@@ -236,6 +241,16 @@ def _detect_target() -> str | None:
     return f"{arch}-{plat}"
 
 
+def is_platform_supported() -> bool:
+    """True when tirith ships a prebuilt binary for this OS+arch.
+
+    Used by callers (CLI banner, etc.) to distinguish "tirith failed to
+    install" from "tirith was never going to install here" — the latter
+    is silent because there is nothing the user can do about it.
+    """
+    return _detect_target() is not None
+
+
 def _download_file(url: str, dest: str, timeout: int = 10):
     """Download a URL to a local file."""
     req = urllib.request.Request(url)
@@ -448,6 +463,15 @@ def _resolve_tirith_path(configured_path: str) -> str:
     explicit = _is_explicit_path(configured_path)
     install_failed = _resolved_path is _INSTALL_FAILED
 
+    # Platform has no tirith build (Windows etc.). Cache the verdict and
+    # return the unexpanded configured path — the spawn loop will fail-open
+    # via the dedupe'd OSError handler, but only after the first call; on
+    # subsequent calls the fast-path above short-circuits before spawning.
+    if not explicit and not is_platform_supported():
+        _resolved_path = _INSTALL_FAILED
+        _install_failure_reason = "unsupported_platform"
+        return expanded
+
     # Explicit path: check it and stop. Never auto-download a replacement.
     if explicit:
         if os.path.isfile(expanded) and os.access(expanded, os.X_OK):
@@ -574,6 +598,14 @@ def ensure_installed(*, log_failures: bool = True):
             return path
         return None
 
+    # Platform has no tirith build (e.g. Windows) — don't probe PATH,
+    # don't start a download thread, don't write a disk failure marker.
+    # Pattern-matching guards still run; this path stays silent.
+    if not is_platform_supported():
+        _resolved_path = _INSTALL_FAILED
+        _install_failure_reason = "unsupported_platform"
+        return None
+
     configured_path = cfg["tirith_path"]
     explicit = _is_explicit_path(configured_path)
     expanded = os.path.expanduser(configured_path)
@@ -659,6 +691,12 @@ def check_command_security(command: str) -> dict:
     if not cfg["tirith_enabled"]:
         return {"action": "allow", "findings": [], "summary": ""}
 
+    # Unsupported platform (Windows etc.) — tirith has no binary here and
+    # never will. Skip the resolver entirely so we don't even try to spawn.
+    # Pattern-matching guards still run via the rest of approval.py.
+    if not is_platform_supported():
+        return {"action": "allow", "findings": [], "summary": ""}
+
     tirith_path = _resolve_tirith_path(cfg["tirith_path"])
     timeout = cfg["tirith_timeout"]
     fail_open = cfg["tirith_fail_open"]
diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md
index fca8a99a248..2a48deb2448 100644
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@@ -537,6 +537,8 @@ security:
 
 When `tirith_fail_open` is `true` (default), commands proceed if tirith is not installed or times out. Set to `false` in high-security environments to block commands when tirith is unavailable.
 
+Tirith ships prebuilt binaries for Linux (x86_64 / aarch64) and macOS (x86_64 / arm64). On platforms with no prebuilt binary (Windows, etc.), tirith is silently skipped — pattern-matching guards still run, and the CLI does not surface an "unavailable" banner. To use tirith on Windows, run Hermes under WSL.
+
 Tirith's verdict integrates with the approval flow: safe commands pass through, while both suspicious and blocked commands trigger user approval with the full tirith findings (severity, title, description, safer alternatives). Users can approve or deny — the default choice is deny to keep unattended scenarios secure.
 
 ### Context File Injection Protection

From ca413c6164e7957d33841353feb9cdbf838dead7 Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Fri, 15 May 2026 23:39:29 -0400
Subject: [PATCH 105/218] fix(dashboard): align Ukrainian Kanban Ready column
 help

Mirrors the dependency-ready / assign-profile semantics used in other locales;
Copilot review noted uk.ts was still on the old dispatcher-tick wording.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 web/src/i18n/uk.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/src/i18n/uk.ts b/web/src/i18n/uk.ts
index 72726aabe5f..3c3df8dae68 100644
--- a/web/src/i18n/uk.ts
+++ b/web/src/i18n/uk.ts
@@ -663,7 +663,7 @@ export const uk: Translations = {
     columnHelp: {
       triage: "Сирі ідеї — специфікатор деталізує специфікацію",
       todo: "Очікує на залежності або не призначено",
-      ready: "Призначено, очікує тіку диспетчера",
+      ready: "Залежності задоволені; призначте профіль для диспетчеризації",
       running: "Захоплено воркером — у роботі",
       blocked: "Воркер запитав втручання людини",
       done: "Завершено",

From 965610f922be5b2afb6fa412205077486734a433 Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Fri, 15 May 2026 23:57:30 -0400
Subject: [PATCH 106/218] fix(cli): tolerate unreadable dirs when building
 systemd PATH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

generate_systemd_unit runs _build_service_path_dirs(); tests that mimic sudo
(Path.home → /root) caused is_dir() to raise PermissionError for unprivileged
users on /root/.hermes/..., failing CI. Treat inaccessible paths like missing.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 hermes_cli/gateway.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index a865bcaf8be..f2d6223f3de 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2103,6 +2103,19 @@ def _hermes_home_for_target_user(target_home_dir: str) -> str:
         return str(current_hermes)
 
 
+def _path_usable_bindir(path: Path) -> bool:
+    """True iff ``path`` exists as a dir and we could stat/read it.
+
+    systemd unit generation may run under simulated ``sudo`` in tests via
+    ``Path.home()`` → ``/root``; unprivileged users get ``PermissionError`` on
+    ``/root/.hermes/…`` probes. Missing/unreadable dirs are treated as absent.
+    """
+    try:
+        return path.is_dir()
+    except OSError:
+        return False
+
+
 def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
     """Build PATH directory list for service units, excluding non-existent dirs."""
     if project_root is None:
@@ -2111,21 +2124,21 @@ def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
     candidates = []
 
     venv_bin = project_root / "venv" / "bin"
-    if venv_bin.is_dir():
+    if _path_usable_bindir(venv_bin):
         candidates.append(str(venv_bin))
     elif sys.prefix != sys.base_prefix:
         candidates.append(str(Path(sys.prefix) / "bin"))
 
     node_bin = project_root / "node_modules" / ".bin"
-    if node_bin.is_dir():
+    if _path_usable_bindir(node_bin):
         candidates.append(str(node_bin))
 
     hermes_home = get_hermes_home()
     hermes_node = hermes_home / "node" / "bin"
-    if hermes_node.is_dir():
+    if _path_usable_bindir(hermes_node):
         candidates.append(str(hermes_node))
     hermes_nm = hermes_home / "node_modules" / ".bin"
-    if hermes_nm.is_dir():
+    if _path_usable_bindir(hermes_nm):
         candidates.append(str(hermes_nm))
 
     return candidates

From 16ff9464a5daae9b82bf2ce2c7de5ba8f80cfd40 Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Sat, 16 May 2026 00:04:58 -0400
Subject: [PATCH 107/218] Revert "fix(cli): tolerate unreadable dirs when
 building systemd PATH"

This reverts commit 965610f922be5b2afb6fa412205077486734a433.
---
 hermes_cli/gateway.py | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index f2d6223f3de..a865bcaf8be 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2103,19 +2103,6 @@ def _hermes_home_for_target_user(target_home_dir: str) -> str:
         return str(current_hermes)
 
 
-def _path_usable_bindir(path: Path) -> bool:
-    """True iff ``path`` exists as a dir and we could stat/read it.
-
-    systemd unit generation may run under simulated ``sudo`` in tests via
-    ``Path.home()`` → ``/root``; unprivileged users get ``PermissionError`` on
-    ``/root/.hermes/…`` probes. Missing/unreadable dirs are treated as absent.
-    """
-    try:
-        return path.is_dir()
-    except OSError:
-        return False
-
-
 def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
     """Build PATH directory list for service units, excluding non-existent dirs."""
     if project_root is None:
@@ -2124,21 +2111,21 @@ def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
     candidates = []
 
     venv_bin = project_root / "venv" / "bin"
-    if _path_usable_bindir(venv_bin):
+    if venv_bin.is_dir():
         candidates.append(str(venv_bin))
     elif sys.prefix != sys.base_prefix:
         candidates.append(str(Path(sys.prefix) / "bin"))
 
     node_bin = project_root / "node_modules" / ".bin"
-    if _path_usable_bindir(node_bin):
+    if node_bin.is_dir():
         candidates.append(str(node_bin))
 
     hermes_home = get_hermes_home()
     hermes_node = hermes_home / "node" / "bin"
-    if _path_usable_bindir(hermes_node):
+    if hermes_node.is_dir():
         candidates.append(str(hermes_node))
     hermes_nm = hermes_home / "node_modules" / ".bin"
-    if _path_usable_bindir(hermes_nm):
+    if hermes_nm.is_dir():
         candidates.append(str(hermes_nm))
 
     return candidates

From c9b32a654cd1f3480920431bd4e32a035a61a29d Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 21:54:56 -0700
Subject: [PATCH 108/218] feat(skill): darwinian-evolver optional skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thin wrapper around Imbue's darwinian_evolver (AGPL-3.0, subprocess-only).
Ships a working OpenRouter driver (parrot_openrouter.py), a snapshot
inspector (show_snapshot.py), and a custom-problem template. SKILL.md
has 58-char description, Pitfalls sourced from actually running the loop:
non-viable seed trap, Azure content filter killing runs, loop.run() being
a generator, nested-pickle snapshots, and aggressive default concurrency.

Salvaged from #12719 by @Bihruze — original PR shipped 12,289 LOC across
61 files (29 Python modules, FastAPI dashboard, VS Code extension,
benchmark hub, marketplace, etc.) which was far beyond the scope of the
underlying issue (#336). This version stays at the ~700-LOC scope that
issue actually asked for. Authorship of the original effort credited via
AUTHOR_MAP entry and the SKILL.md author field.

Verified end-to-end: seed 'Say {{ phrase }}' (score 0.000) evolved into
'Please repeat the following phrase exactly as it is, without any
modifications or additional formatting: {{ phrase }}' (score 0.750)
across 3 iterations on gpt-4o-mini via OpenRouter.

Co-authored-by: Bihruze <98262967+Bihruze@users.noreply.github.com>
---
 .../research/darwinian-evolver/SKILL.md       | 199 +++++++++++++++
 .../scripts/parrot_openrouter.py              | 218 ++++++++++++++++
 .../scripts/show_snapshot.py                  |  69 +++++
 .../templates/custom_problem_template.py      | 240 ++++++++++++++++++
 tests/skills/test_darwinian_evolver_skill.py  | 102 ++++++++
 5 files changed, 828 insertions(+)
 create mode 100644 optional-skills/research/darwinian-evolver/SKILL.md
 create mode 100644 optional-skills/research/darwinian-evolver/scripts/parrot_openrouter.py
 create mode 100644 optional-skills/research/darwinian-evolver/scripts/show_snapshot.py
 create mode 100644 optional-skills/research/darwinian-evolver/templates/custom_problem_template.py
 create mode 100644 tests/skills/test_darwinian_evolver_skill.py

diff --git a/optional-skills/research/darwinian-evolver/SKILL.md b/optional-skills/research/darwinian-evolver/SKILL.md
new file mode 100644
index 00000000000..272f6702481
--- /dev/null
+++ b/optional-skills/research/darwinian-evolver/SKILL.md
@@ -0,0 +1,199 @@
+---
+name: darwinian-evolver
+description: Evolve prompts/regex/SQL/code with Imbue's evolution loop.
+version: 0.1.0
+author: Bihruze (Asahi0x), Hermes Agent
+license: MIT
+platforms: [linux, macos]
+metadata:
+  hermes:
+    tags: [evolution, optimization, prompt-engineering, research]
+    related_skills: [arxiv, jupyter-live-kernel]
+---
+
+# Darwinian Evolver
+
+Run Imbue's [darwinian_evolver](https://github.com/imbue-ai/darwinian_evolver) — an
+LLM-driven evolutionary search loop — to optimize a **prompt, regex, SQL query,
+or small code snippet** against a fitness function.
+
+Status: thin wrapper around the upstream tool. The skill installs it, walks the
+agent through writing a `Problem` definition (organism + evaluator + mutator),
+and drives the loop via the upstream CLI or a small custom Python driver.
+
+**License:** the upstream tool is **AGPL-3.0**. The skill ONLY ever invokes it
+via the upstream CLI or a `subprocess`/`uv run` call (mere aggregation). Do NOT
+import upstream classes into Hermes itself.
+
+## When to Use
+
+- User says "optimize this prompt", "evolve a regex for X", "auto-improve this
+  code/SQL", "search for a better instruction".
+- You have a scorer (exact match, regex pass-rate, unit test, LLM-judge, runtime
+  metric) AND a starting candidate (organism). If you don't have a scorer, stop
+  and define one first — that's the hard part.
+- Cost is OK: a typical run is 50–500 LLM calls. On gpt-4o-mini that's pennies;
+  on Claude Sonnet it can be a few dollars.
+
+Do **not** use this when:
+- The optimization target is differentiable (use gradient descent / DSPy).
+- You only need to try 2–3 variants — just write them by hand.
+- The fitness signal is purely subjective with no measurable criterion.
+
+## Prerequisites
+
+- Python ≥3.11
+- `git`, `uv` (or `pip`)
+- One of: `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, or `OPENAI_API_KEY`
+
+The skill ships a small `parrot_openrouter.py` driver that uses `OPENROUTER_API_KEY`
+via the OpenAI SDK, so any model on OpenRouter works. The upstream CLI itself
+hardcodes Anthropic and needs `ANTHROPIC_API_KEY`.
+
+## Install (One-Time)
+
+Run via the `terminal` tool:
+
+```bash
+mkdir -p ~/.hermes/cache/darwinian-evolver && cd ~/.hermes/cache/darwinian-evolver
+[ -d darwinian_evolver ] || git clone --depth 1 https://github.com/imbue-ai/darwinian_evolver.git
+cd darwinian_evolver && uv sync
+```
+
+Verify:
+
+```bash
+cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver \
+  && uv run darwinian_evolver --help | head -5
+```
+
+## Quick Start — The Built-In Parrot Example
+
+Tiny smoke test (requires `ANTHROPIC_API_KEY`):
+
+```bash
+cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver
+uv run darwinian_evolver parrot \
+  --num_iterations 2 \
+  --num_parents_per_iteration 2 \
+  --mutator_concurrency 2 --evaluator_concurrency 2 \
+  --output_dir /tmp/parrot_demo
+```
+
+Outputs:
+- `/tmp/parrot_demo/snapshots/iteration_N.pkl` — pickled population per iteration
+- `/tmp/parrot_demo/<jsonl>` — per-iteration JSON log (path printed at end)
+
+Open `~/.hermes/cache/darwinian-evolver/darwinian_evolver/darwinian_evolver/lineage_visualizer.html`
+in a browser and load the JSON log to see the evolutionary tree.
+
+## Quick Start — OpenRouter Driver (No Anthropic Key)
+
+The skill ships `scripts/parrot_openrouter.py` — same parrot problem, but the
+LLM call goes through OpenRouter so any provider works.
+
+```bash
+# From wherever the skill is installed:
+SKILL_DIR=~/.hermes/skills/research/darwinian-evolver
+DE_DIR=~/.hermes/cache/darwinian-evolver/darwinian_evolver
+
+cd "$DE_DIR" && \
+  EVOLVER_MODEL='openai/gpt-4o-mini' \
+  uv run --with openai python "$SKILL_DIR/scripts/parrot_openrouter.py" \
+    --num_iterations 3 --num_parents_per_iteration 2 \
+    --output_dir /tmp/parrot_or
+```
+
+Inspect the result with `scripts/show_snapshot.py`:
+
+```bash
+uv run --with openai python "$SKILL_DIR/scripts/show_snapshot.py" \
+  /tmp/parrot_or/snapshots/iteration_3.pkl
+```
+
+Expected output: 7 evolved prompt templates ranked by score, with the best
+landing around 0.6–0.8 (the seed `Say {{ phrase }}` scored 0.000).
+
+## Defining a Custom Problem
+
+The skill ships `templates/custom_problem_template.py` — copy, edit, run.
+Three things you must define:
+
+1. **`Organism`** — a Pydantic `BaseModel` subclass holding the artifact being
+   evolved (`prompt_template: str`, `regex_pattern: str`, `sql_query: str`,
+   `code_block: str`, etc.). Add a `run(*args)` method that exercises it.
+
+2. **`Evaluator`** — `.evaluate(organism) -> EvaluationResult(score=..., trainable_failure_cases=[...], holdout_failure_cases=[...], is_viable=True)`.
+   - **`score`** is in `[0, 1]`. Higher is better.
+   - **`trainable_failure_cases`** — what the mutator sees. Include enough
+     context (input, expected, actual) for the LLM to diagnose.
+   - **`holdout_failure_cases`** — kept out of the mutator's view. Use these
+     to detect overfitting.
+   - **`is_viable=True`** unless the organism is completely broken (raises,
+     returns None, etc.). A 0-score viable organism is fine — it just gets
+     down-weighted in parent selection.
+
+3. **`Mutator`** — `.mutate(organism, failure_cases, learning_log_entries) -> list[Organism]`.
+   Typically: build an LLM prompt that includes the current organism + a
+   failure case + an ask to propose a fix; parse the LLM's response; return
+   a new `Organism`. Return `[]` on parse failure — the loop handles it.
+
+Then write a driver script that wires `Problem(initial_organism, evaluator, [mutators])`
+into `EvolveProblemLoop` and iterates over `loop.run(num_iterations=N)` — the
+shipped `scripts/parrot_openrouter.py` is the reference.
+
+## Hyperparameters That Actually Matter
+
+| flag | default | when to change |
+|---|---|---|
+| `--num_iterations` | 5 | bump to 10–20 once you trust the evaluator |
+| `--num_parents_per_iteration` | 4 | drop to 2 for cheap exploration |
+| `--mutator_concurrency` | 10 | drop to 2–4 to avoid rate limits |
+| `--evaluator_concurrency` | 10 | same; evaluator hits the LLM too |
+| `--batch_size` | 1 | raise to 3–5 once your mutator handles multiple failures |
+| `--verify_mutations` | off | turn on once mutator is wasteful (>10× cost saving on later runs per Imbue) |
+| `--midpoint_score` | `p75` | leave alone unless scores cluster |
+| `--sharpness` | 10 | leave alone |
+
+## Pitfalls
+
+1. **`Initial organism must be viable`** — set `is_viable=True` in your
+   `EvaluationResult` even on a 0-score seed. The loop refuses non-viable
+   organisms because they imply the loop has nothing to evolve from.
+2. **Provider content filters kill runs.** Azure-backed OpenRouter models
+   reject phrases like "ignore previous instructions" with HTTP 400. Wrap
+   the LLM call in `try/except` and return `f"<LLM_ERROR: {e}>"` — the
+   evolver will just score that organism 0 and move on.
+3. **`loop.run()` is a generator** — calling it doesn't run anything until
+   you iterate. Use `for snap in loop.run(num_iterations=N):`.
+4. **Snapshots are nested pickles.** `iteration_N.pkl` contains a dict with
+   `population_snapshot` (more pickled bytes). To unpickle you must have the
+   `Organism` class importable under the same dotted path it was pickled at.
+5. **Concurrency defaults are aggressive.** 10/10 will hit rate limits on
+   most providers. Start with 2/2.
+6. **CLI is hardcoded to Anthropic.** `uv run darwinian_evolver <problem>`
+   reaches for `ANTHROPIC_API_KEY` and uses Claude Sonnet. To use any other
+   provider, write a driver like `parrot_openrouter.py`.
+7. **AGPL.** Never `from darwinian_evolver import ...` inside Hermes core.
+   Custom driver scripts under `~/.hermes/skills/...` are user-side and fine.
+8. **No PyPI package.** `pip install darwinian-evolver` will pull the wrong
+   thing. Always install from the GitHub repo.
+
+## Verification
+
+After install + a parrot run, exit code 0 from this is sufficient:
+
+```bash
+DE_DIR=~/.hermes/cache/darwinian-evolver/darwinian_evolver
+ls "$DE_DIR/darwinian_evolver/lineage_visualizer.html" >/dev/null && \
+cd "$DE_DIR" && uv run darwinian_evolver --help >/dev/null && \
+echo "darwinian-evolver: OK"
+```
+
+## References
+
+- [Imbue research post](https://imbue.com/research/2026-02-27-darwinian-evolver/)
+- [ARC-AGI-2 results](https://imbue.com/research/2026-02-27-arc-agi-2-evolution/)
+- [imbue-ai/darwinian_evolver](https://github.com/imbue-ai/darwinian_evolver) (AGPL-3.0)
+- [Darwin Gödel Machines](https://arxiv.org/abs/2505.22954)
+- [PromptBreeder](https://arxiv.org/abs/2309.16797)
diff --git a/optional-skills/research/darwinian-evolver/scripts/parrot_openrouter.py b/optional-skills/research/darwinian-evolver/scripts/parrot_openrouter.py
new file mode 100644
index 00000000000..545f8f1feb3
--- /dev/null
+++ b/optional-skills/research/darwinian-evolver/scripts/parrot_openrouter.py
@@ -0,0 +1,218 @@
+"""
+parrot_openrouter: same as the upstream `parrot` example but the LLM call goes
+through OpenRouter (OpenAI SDK) instead of Anthropic native. Lets us run an
+end-to-end evolution with whatever model the user already has paid access to.
+
+Run with:
+    uv --project darwinian_evolver run python parrot_openrouter.py \
+        --num_iterations 3 --output_dir /tmp/parrot_out
+
+Reads `OPENROUTER_API_KEY` from the environment.
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+import jinja2
+from openai import OpenAI
+
+# Vendored problem types from upstream (AGPL — only run via subprocess in production)
+from darwinian_evolver.cli_common import build_hyperparameter_config_from_args
+from darwinian_evolver.cli_common import register_hyperparameter_args
+from darwinian_evolver.cli_common import parse_learning_log_view_type
+from darwinian_evolver.evolve_problem_loop import EvolveProblemLoop
+from darwinian_evolver.learning_log import LearningLogEntry
+from darwinian_evolver.problem import EvaluationFailureCase
+from darwinian_evolver.problem import EvaluationResult
+from darwinian_evolver.problem import Evaluator
+from darwinian_evolver.problem import Mutator
+from darwinian_evolver.problem import Organism
+from darwinian_evolver.problem import Problem
+
+DEFAULT_MODEL = os.environ.get("EVOLVER_MODEL", "openai/gpt-4o-mini")
+
+
+def _client() -> OpenAI:
+    key = os.environ.get("OPENROUTER_API_KEY")
+    if not key:
+        sys.exit("OPENROUTER_API_KEY is not set")
+    return OpenAI(api_key=key, base_url="https://openrouter.ai/api/v1")
+
+
+def _prompt_llm(prompt: str) -> str:
+    try:
+        r = _client().chat.completions.create(
+            model=DEFAULT_MODEL,
+            max_tokens=1024,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return r.choices[0].message.content or ""
+    except Exception as e:
+        # Treat any provider error (rate limit, content filter, schema reject)
+        # as a failed response. The evolver will simply see this as a low score
+        # on this organism and move on — much friendlier than killing the run.
+        return f"<LLM_ERROR: {type(e).__name__}: {e}>"
+
+
+class ParrotOrganism(Organism):
+    prompt_template: str
+
+    def run(self, phrase: str) -> str:
+        try:
+            prompt = jinja2.Template(self.prompt_template).render(phrase=phrase)
+        except jinja2.exceptions.TemplateError as e:
+            return f"Error rendering prompt: {e}"
+        if not prompt:
+            return ""
+        return _prompt_llm(prompt)
+
+
+class ParrotEvaluationFailureCase(EvaluationFailureCase):
+    phrase: str
+    response: str
+
+
+class ImproveParrotMutator(Mutator[ParrotOrganism, ParrotEvaluationFailureCase]):
+    IMPROVEMENT_PROMPT_TEMPLATE = """
+We want to build a prompt that causes an LLM to repeat back a given phrase verbatim.
+
+The current prompt template is:
+```
+{{ organism.prompt_template }}
+```
+
+Unfortunately, on this phrase:
+```
+{{ failure_case.phrase }}
+```
+the LLM responded with:
+```
+{{ failure_case.response }}
+```
+
+Diagnose what went wrong, then propose an improved prompt template. Put the new
+template in the LAST triple-backtick block of your response.
+""".strip()
+
+    def mutate(
+        self,
+        organism: ParrotOrganism,
+        failure_cases: list[ParrotEvaluationFailureCase],
+        learning_log_entries: list[LearningLogEntry],
+    ) -> list[ParrotOrganism]:
+        fc = failure_cases[0]
+        prompt = jinja2.Template(self.IMPROVEMENT_PROMPT_TEMPLATE).render(
+            organism=organism, failure_case=fc
+        )
+        try:
+            resp = _prompt_llm(prompt)
+            parts = resp.split("```")
+            if len(parts) < 3:
+                return []
+            new_tpl = parts[-2].strip()
+            return [ParrotOrganism(prompt_template=new_tpl)]
+        except Exception as e:
+            print(f"mutate error: {e}", file=sys.stderr)
+            return []
+
+
+class ParrotEvaluator(Evaluator[ParrotOrganism, EvaluationResult, ParrotEvaluationFailureCase]):
+    TRAINABLE_PHRASES = [
+        "Hello world.",
+        "bla",
+        "Bla",
+        "bla.",
+        '"bla bla".',
+        "Just say 'foo' once with no extra words.",
+    ]
+    HOLDOUT_PHRASES = [
+        "bla, but only once.",
+        "'bla'",
+    ]
+
+    def evaluate(self, organism: ParrotOrganism) -> EvaluationResult:
+        train_fails: list[ParrotEvaluationFailureCase] = []
+        hold_fails: list[ParrotEvaluationFailureCase] = []
+        for i, p in enumerate(self.TRAINABLE_PHRASES):
+            r = organism.run(p)
+            if r != p:
+                train_fails.append(ParrotEvaluationFailureCase(
+                    phrase=p, response=r, data_point_id=f"trainable_{i}"))
+        for i, p in enumerate(self.HOLDOUT_PHRASES):
+            r = organism.run(p)
+            if r != p:
+                hold_fails.append(ParrotEvaluationFailureCase(
+                    phrase=p, response=r, data_point_id=f"holdout_{i}"))
+        n_total = len(self.TRAINABLE_PHRASES) + len(self.HOLDOUT_PHRASES)
+        n_ok = n_total - len(train_fails) - len(hold_fails)
+        return EvaluationResult(
+            score=n_ok / n_total,
+            trainable_failure_cases=train_fails,
+            holdout_failure_cases=hold_fails,
+            # Always viable. Even a 0-score seed is a valid starting point; the
+            # mutator should still get a chance to fix it.
+            is_viable=True,
+        )
+
+
+def make_problem() -> Problem:
+    return Problem[ParrotOrganism, EvaluationResult, ParrotEvaluationFailureCase](
+        evaluator=ParrotEvaluator(),
+        mutators=[ImproveParrotMutator()],
+        initial_organism=ParrotOrganism(prompt_template="Say {{ phrase }}"),
+    )
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    register_hyperparameter_args(ap.add_argument_group("hyperparameters"))
+    ap.add_argument("--num_iterations", type=int, default=3)
+    ap.add_argument("--mutator_concurrency", type=int, default=4)
+    ap.add_argument("--evaluator_concurrency", type=int, default=4)
+    ap.add_argument("--output_dir", type=str, required=True)
+    args = ap.parse_args()
+
+    out = Path(args.output_dir)
+    out.mkdir(parents=True, exist_ok=True)
+
+    hp = build_hyperparameter_config_from_args(args)
+    loop = EvolveProblemLoop(
+        problem=make_problem(),
+        learning_log_view_type=parse_learning_log_view_type(hp.learning_log_view_type),
+        num_parents_per_iteration=hp.num_parents_per_iteration,
+        mutator_concurrency=args.mutator_concurrency,
+        evaluator_concurrency=args.evaluator_concurrency,
+        fixed_midpoint_score=hp.fixed_midpoint_score,
+        midpoint_score_percentile=hp.midpoint_score_percentile,
+        sharpness=hp.sharpness,
+        novelty_weight=hp.novelty_weight,
+        batch_size=hp.batch_size,
+        should_verify_mutations=hp.verify_mutations,
+    )
+
+    import json
+    log_path = out / "results.jsonl"
+    snap_dir = out / "snapshots"
+    snap_dir.mkdir(exist_ok=True)
+    print("Evaluating initial organism...")
+    for snap in loop.run(num_iterations=args.num_iterations):
+        (snap_dir / f"iteration_{snap.iteration}.pkl").write_bytes(snap.snapshot)
+        _, best_eval = snap.best_organism_result
+        print(f"iter={snap.iteration} pop={snap.population_size} "
+              f"best_score={best_eval.score:.3f}")
+        with log_path.open("a") as f:
+            f.write(json.dumps({
+                "iteration": snap.iteration,
+                "best_score": best_eval.score,
+                "pop_size": snap.population_size,
+                "score_percentiles": {str(k): v for k, v in snap.score_percentiles.items()},
+            }) + "\n")
+    print(f"\nDone. Results in: {out}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/optional-skills/research/darwinian-evolver/scripts/show_snapshot.py b/optional-skills/research/darwinian-evolver/scripts/show_snapshot.py
new file mode 100644
index 00000000000..10e3a03dca9
--- /dev/null
+++ b/optional-skills/research/darwinian-evolver/scripts/show_snapshot.py
@@ -0,0 +1,69 @@
+"""
+show_snapshot.py — Dump the population from a darwinian-evolver snapshot pickle.
+
+Usage:
+    python show_snapshot.py PATH/TO/iteration_N.pkl [--field prompt_template]
+
+The script is intentionally Organism-agnostic: it walks `org.__dict__` and prints
+all str fields. By default it shows `prompt_template` if present; pass --field to
+target a different attribute (e.g. `regex_pattern`, `sql_query`, `code_block`).
+"""
+from __future__ import annotations
+
+import argparse
+import pickle
+import sys
+from pathlib import Path
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("snapshot", type=Path)
+    ap.add_argument(
+        "--field",
+        default=None,
+        help="Organism attribute to display. Defaults to the first str field found.",
+    )
+    ap.add_argument("--top", type=int, default=None, help="Show only top N by score.")
+    args = ap.parse_args()
+
+    if not args.snapshot.exists():
+        sys.exit(f"snapshot not found: {args.snapshot}")
+
+    # The outer pickle wraps a dict; the inner pickle contains the actual organism
+    # objects, which must be importable under their original dotted path. If you
+    # ran a custom driver, make sure its module is on sys.path before calling this.
+    outer = pickle.loads(args.snapshot.read_bytes())
+    if not isinstance(outer, dict) or "population_snapshot" not in outer:
+        sys.exit("not a darwinian-evolver snapshot (no population_snapshot key)")
+    inner = pickle.loads(outer["population_snapshot"])
+    pairs = inner["organisms"]  # list of (Organism, EvaluationResult)
+
+    print(f"# organisms: {len(pairs)}\n")
+    ranked = sorted(pairs, key=lambda p: getattr(p[1], "score", 0) or 0, reverse=True)
+    if args.top:
+        ranked = ranked[: args.top]
+
+    for i, (org, res) in enumerate(ranked):
+        score = getattr(res, "score", float("nan"))
+        print(f"=== rank {i} score={score:.3f} ===")
+        # pick field
+        field = args.field
+        if field is None:
+            for k, v in vars(org).items():
+                if isinstance(v, str) and not k.startswith("_") and k not in ("id",):
+                    field = k
+                    break
+        val = getattr(org, field, None) if field else None
+        if val is None:
+            print(f"  (no string field; org fields: {list(vars(org).keys())})")
+        else:
+            print(f"  {field} ({len(val)} chars):")
+            for ln in val.splitlines()[:30]:
+                print(f"    {ln}")
+        print()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/optional-skills/research/darwinian-evolver/templates/custom_problem_template.py b/optional-skills/research/darwinian-evolver/templates/custom_problem_template.py
new file mode 100644
index 00000000000..c6daac14ede
--- /dev/null
+++ b/optional-skills/research/darwinian-evolver/templates/custom_problem_template.py
@@ -0,0 +1,240 @@
+"""
+Template: a custom darwinian-evolver problem.
+
+Copy this file, fill in the THREE marked spots (Organism, Evaluator, Mutator),
+then run it as a driver script. The skeleton handles all the wiring so you only
+write the domain-specific logic.
+
+To run:
+    cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver
+    OPENROUTER_API_KEY=... uv run --with openai python /path/to/this_file.py \
+        --num_iterations 3 --num_parents_per_iteration 2 \
+        --output_dir /tmp/my_problem
+
+The pattern mirrors `scripts/parrot_openrouter.py` (the working reference).
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+from openai import OpenAI
+
+# Upstream types (AGPL — invoked via subprocess in production; importing here
+# is fine for skill-side driver scripts the user owns).
+from darwinian_evolver.cli_common import (
+    build_hyperparameter_config_from_args,
+    parse_learning_log_view_type,
+    register_hyperparameter_args,
+)
+from darwinian_evolver.evolve_problem_loop import EvolveProblemLoop
+from darwinian_evolver.learning_log import LearningLogEntry
+from darwinian_evolver.problem import (
+    EvaluationFailureCase,
+    EvaluationResult,
+    Evaluator,
+    Mutator,
+    Organism,
+    Problem,
+)
+
+DEFAULT_MODEL = os.environ.get("EVOLVER_MODEL", "openai/gpt-4o-mini")
+
+
+def _client() -> OpenAI:
+    key = os.environ.get("OPENROUTER_API_KEY")
+    if not key:
+        sys.exit("OPENROUTER_API_KEY is not set")
+    return OpenAI(api_key=key, base_url="https://openrouter.ai/api/v1")
+
+
+def _prompt_llm(prompt: str, max_tokens: int = 1024) -> str:
+    try:
+        r = _client().chat.completions.create(
+            model=DEFAULT_MODEL,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return r.choices[0].message.content or ""
+    except Exception as e:
+        # Never let one bad LLM response kill the run.
+        return f"<LLM_ERROR: {type(e).__name__}: {e}>"
+
+
+# ---------------------------------------------------------------------------
+# 1. ORGANISM — what you are evolving.
+# ---------------------------------------------------------------------------
+class MyOrganism(Organism):
+    # TODO: replace with your artifact field. Common shapes:
+    #   prompt_template: str
+    #   regex_pattern: str
+    #   sql_query: str
+    #   code_block: str
+    artifact: str
+
+    def run(self, *inputs) -> str:
+        """Exercise the organism on a test input. Return whatever your
+        evaluator wants to score."""
+        # TODO: implement. For prompt evolution this typically calls _prompt_llm
+        # with the artifact rendered against the input. For regex/SQL it would
+        # call `re.findall(self.artifact, input)` / execute SQL / etc.
+        raise NotImplementedError
+
+
+# ---------------------------------------------------------------------------
+# 2. EVALUATOR — score organisms and surface failures the mutator can learn from.
+# ---------------------------------------------------------------------------
+class MyFailureCase(EvaluationFailureCase):
+    # TODO: include enough context for the LLM to diagnose the failure.
+    input: str
+    expected: str
+    actual: str
+
+
+class MyEvaluator(Evaluator[MyOrganism, EvaluationResult, MyFailureCase]):
+    # Split your dataset. Mutator only sees trainable; holdout detects overfitting.
+    TRAINABLE = [
+        # TODO: list of (input, expected) tuples
+        # ("input1", "expected1"),
+    ]
+    HOLDOUT = [
+        # TODO: separate set the mutator never sees
+    ]
+
+    def evaluate(self, organism: MyOrganism) -> EvaluationResult:
+        train_fails: list[MyFailureCase] = []
+        hold_fails: list[MyFailureCase] = []
+        for i, (inp, expected) in enumerate(self.TRAINABLE):
+            actual = organism.run(inp)
+            if actual != expected:
+                train_fails.append(MyFailureCase(
+                    input=inp, expected=expected, actual=actual,
+                    data_point_id=f"trainable_{i}",
+                ))
+        for i, (inp, expected) in enumerate(self.HOLDOUT):
+            actual = organism.run(inp)
+            if actual != expected:
+                hold_fails.append(MyFailureCase(
+                    input=inp, expected=expected, actual=actual,
+                    data_point_id=f"holdout_{i}",
+                ))
+        n_total = len(self.TRAINABLE) + len(self.HOLDOUT)
+        n_ok = n_total - len(train_fails) - len(hold_fails)
+        return EvaluationResult(
+            score=n_ok / n_total if n_total else 0.0,
+            trainable_failure_cases=train_fails,
+            holdout_failure_cases=hold_fails,
+            # Always-viable. The evolver only blocks completely-broken organisms;
+            # a 0-score organism is fine and will simply be sampled less often.
+            is_viable=True,
+        )
+
+
+# ---------------------------------------------------------------------------
+# 3. MUTATOR — LLM proposes an improved organism from a failure case.
+# ---------------------------------------------------------------------------
+class MyMutator(Mutator[MyOrganism, MyFailureCase]):
+    PROMPT = """
+The current artifact is:
+```
+{artifact}
+```
+
+On this input:
+```
+{input}
+```
+it produced:
+```
+{actual}
+```
+but we wanted:
+```
+{expected}
+```
+
+Diagnose what went wrong, then propose an improved version of the artifact.
+Put the new version in the LAST triple-backtick block of your response.
+""".strip()
+
+    def mutate(
+        self,
+        organism: MyOrganism,
+        failure_cases: list[MyFailureCase],
+        learning_log_entries: list[LearningLogEntry],
+    ) -> list[MyOrganism]:
+        fc = failure_cases[0]
+        prompt = self.PROMPT.format(
+            artifact=organism.artifact,
+            input=fc.input,
+            actual=fc.actual,
+            expected=fc.expected,
+        )
+        resp = _prompt_llm(prompt)
+        parts = resp.split("```")
+        if len(parts) < 3:
+            return []
+        new_artifact = parts[-2].strip()
+        # Strip an opening language tag like "python\n" or "sql\n"
+        if "\n" in new_artifact:
+            first_line, rest = new_artifact.split("\n", 1)
+            if first_line and not first_line.startswith(" ") and len(first_line) < 20:
+                new_artifact = rest
+        return [MyOrganism(artifact=new_artifact)]
+
+
+# ---------------------------------------------------------------------------
+# Driver — fills in the EvolveProblemLoop boilerplate. You shouldn't need to
+# touch anything below this line for a typical run.
+# ---------------------------------------------------------------------------
+def make_problem() -> Problem:
+    initial = MyOrganism(artifact="TODO: starting artifact here")  # TODO
+    return Problem[MyOrganism, EvaluationResult, MyFailureCase](
+        evaluator=MyEvaluator(),
+        mutators=[MyMutator()],
+        initial_organism=initial,
+    )
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    register_hyperparameter_args(ap.add_argument_group("hyperparameters"))
+    ap.add_argument("--num_iterations", type=int, default=3)
+    ap.add_argument("--mutator_concurrency", type=int, default=2)
+    ap.add_argument("--evaluator_concurrency", type=int, default=2)
+    ap.add_argument("--output_dir", type=str, required=True)
+    args = ap.parse_args()
+
+    out = Path(args.output_dir)
+    out.mkdir(parents=True, exist_ok=True)
+    (out / "snapshots").mkdir(exist_ok=True)
+
+    hp = build_hyperparameter_config_from_args(args)
+    loop = EvolveProblemLoop(
+        problem=make_problem(),
+        learning_log_view_type=parse_learning_log_view_type(hp.learning_log_view_type),
+        num_parents_per_iteration=hp.num_parents_per_iteration,
+        mutator_concurrency=args.mutator_concurrency,
+        evaluator_concurrency=args.evaluator_concurrency,
+        fixed_midpoint_score=hp.fixed_midpoint_score,
+        midpoint_score_percentile=hp.midpoint_score_percentile,
+        sharpness=hp.sharpness,
+        novelty_weight=hp.novelty_weight,
+        batch_size=hp.batch_size,
+        should_verify_mutations=hp.verify_mutations,
+    )
+
+    print("Evaluating initial organism...")
+    for snap in loop.run(num_iterations=args.num_iterations):
+        (out / "snapshots" / f"iteration_{snap.iteration}.pkl").write_bytes(snap.snapshot)
+        _, best = snap.best_organism_result
+        print(f"iter={snap.iteration} pop={snap.population_size} best_score={best.score:.3f}")
+
+    print(f"\nDone. Results in: {out}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/skills/test_darwinian_evolver_skill.py b/tests/skills/test_darwinian_evolver_skill.py
new file mode 100644
index 00000000000..8b3a14b8da9
--- /dev/null
+++ b/tests/skills/test_darwinian_evolver_skill.py
@@ -0,0 +1,102 @@
+"""
+Smoke tests for the darwinian-evolver optional skill.
+
+We can't actually run the evolution loop in CI (it needs network + a paid LLM),
+so these tests verify:
+  - SKILL.md frontmatter conforms to the hardline format
+  - shipped scripts parse as valid Python
+  - the scripts reference the right env var / module paths
+"""
+from __future__ import annotations
+
+import ast
+import re
+from pathlib import Path
+
+import pytest
+import yaml
+
+SKILL_DIR = Path(__file__).resolve().parents[2] / "optional-skills" / "research" / "darwinian-evolver"
+
+
+@pytest.fixture(scope="module")
+def frontmatter() -> dict:
+    src = (SKILL_DIR / "SKILL.md").read_text()
+    m = re.search(r"^---\n(.*?)\n---", src, re.DOTALL)
+    assert m, "SKILL.md missing YAML frontmatter"
+    return yaml.safe_load(m.group(1))
+
+
+def test_skill_dir_exists() -> None:
+    assert SKILL_DIR.is_dir(), f"missing skill dir: {SKILL_DIR}"
+
+
+def test_skill_md_present() -> None:
+    assert (SKILL_DIR / "SKILL.md").is_file()
+
+
+def test_description_under_60_chars(frontmatter) -> None:
+    desc = frontmatter["description"]
+    assert len(desc) <= 60, f"description is {len(desc)} chars (hardline ≤60): {desc!r}"
+
+
+def test_name_matches_dir(frontmatter) -> None:
+    assert frontmatter["name"] == "darwinian-evolver"
+
+
+def test_platforms_excludes_windows(frontmatter) -> None:
+    # Upstream uses func_timeout (POSIX signals) and uv subprocess pipelines; the
+    # skill is gated [linux, macos]. If we ever port to Windows, update this test
+    # to assert ["linux", "macos", "windows"].
+    assert "windows" not in frontmatter["platforms"]
+    assert set(frontmatter["platforms"]) >= {"linux", "macos"}
+
+
+def test_author_credits_contributor(frontmatter) -> None:
+    author = frontmatter["author"]
+    assert "Bihruze" in author, f"author should credit the original contributor: {author!r}"
+
+
+def test_license_mit(frontmatter) -> None:
+    assert frontmatter["license"] == "MIT"
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "scripts/parrot_openrouter.py",
+        "scripts/show_snapshot.py",
+        "templates/custom_problem_template.py",
+    ],
+)
+def test_shipped_scripts_parse(path: str) -> None:
+    src = (SKILL_DIR / path).read_text()
+    ast.parse(src)  # raises SyntaxError on broken Python
+
+
+def test_parrot_script_uses_openrouter() -> None:
+    src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
+    assert "OPENROUTER_API_KEY" in src, "parrot driver should read OPENROUTER_API_KEY"
+    assert "openrouter.ai/api/v1" in src, "parrot driver should target OpenRouter"
+    assert "EVOLVER_MODEL" in src, "model should be overridable via EVOLVER_MODEL"
+
+
+def test_parrot_script_has_error_swallowing() -> None:
+    """Provider content-filter / rate-limit must not kill the run — see Pitfall 2."""
+    src = (SKILL_DIR / "scripts" / "parrot_openrouter.py").read_text()
+    assert "LLM_ERROR" in src, "_prompt_llm should swallow provider errors and tag them"
+
+
+def test_skill_calls_out_agpl(frontmatter) -> None:
+    """The upstream tool is AGPL-3.0. The skill MUST flag this so users don't
+    import it into MIT-licensed code by accident."""
+    src = (SKILL_DIR / "SKILL.md").read_text()
+    assert "AGPL" in src, "SKILL.md must mention upstream AGPL license"
+
+
+def test_skill_pitfalls_section_present() -> None:
+    src = (SKILL_DIR / "SKILL.md").read_text()
+    assert "## Pitfalls" in src
+    # Pitfalls we discovered during the spike — keep them in sync with reality.
+    assert "Initial organism must be viable" in src
+    assert "generator" in src  # loop.run() pitfall

From 53637fb17d92b03ca3708f6df104136028459439 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 21:55:01 -0700
Subject: [PATCH 109/218] chore(skills/darwinian-evolver): AUTHOR_MAP + docs
 regen

---
 scripts/release.py                            |   1 +
 .../docs/reference/optional-skills-catalog.md |   1 +
 .../research/research-darwinian-evolver.md    | 217 ++++++++++++++++++
 website/sidebars.ts                           |   1 +
 4 files changed, 220 insertions(+)
 create mode 100644 website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md

diff --git a/scripts/release.py b/scripts/release.py
index 6084e0754c0..2e6bd6e6435 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -59,6 +59,7 @@ AUTHOR_MAP = {
     "m@mobrienv.dev": "mikeyobrien",
     "qiyin.zuo@pcitc.com": "qiyin-code",
     "mr.aashiz@gmail.com": "aashizpoudel",
+    "98262967+Bihruze@users.noreply.github.com": "Bihruze",
     "nidhi2894@gmail.com": "nidhi-singh02",
     "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
     "oleksii.lisikh@gmail.com": "olisikh",
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index d5839f846d1..fc447b7e01f 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -161,6 +161,7 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | [**bioinformatics**](/docs/user-guide/skills/optional/research/research-bioinformatics) | Gateway to 400+ bioinformatics skills from bioSkills and ClawBio. Covers genomics, transcriptomics, single-cell, variant calling, pharmacogenomics, metagenomics, structural biology, and more. Fetches domain-specific reference material on... |
+| [**darwinian-evolver**](/docs/user-guide/skills/optional/research/research-darwinian-evolver) | Evolve prompts/regex/SQL/code with Imbue's evolution loop. |
 | [**domain-intel**](/docs/user-guide/skills/optional/research/research-domain-intel) | Passive domain reconnaissance using Python stdlib. Subdomain discovery, SSL certificate inspection, WHOIS lookups, DNS records, domain availability checks, and bulk multi-domain analysis. No API keys required. |
 | [**drug-discovery**](/docs/user-guide/skills/optional/research/research-drug-discovery) | Pharmaceutical research assistant for drug discovery workflows. Search bioactive compounds on ChEMBL, calculate drug-likeness (Lipinski Ro5, QED, TPSA, synthetic accessibility), look up drug-drug interactions via OpenFDA, interpret ADMET... |
 | [**duckduckgo-search**](/docs/user-guide/skills/optional/research/research-duckduckgo-search) | Free web search via DuckDuckGo — text, news, images, videos. No API key needed. Prefer the `ddgs` CLI when installed; use the Python DDGS library only after verifying that `ddgs` is available in the current runtime. |
diff --git a/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md b/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md
new file mode 100644
index 00000000000..121b2dde160
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md
@@ -0,0 +1,217 @@
+---
+title: "Darwinian Evolver — Evolve prompts/regex/SQL/code with Imbue's evolution loop"
+sidebar_label: "Darwinian Evolver"
+description: "Evolve prompts/regex/SQL/code with Imbue's evolution loop"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Darwinian Evolver
+
+Evolve prompts/regex/SQL/code with Imbue's evolution loop.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/research/darwinian-evolver` |
+| Path | `optional-skills/research/darwinian-evolver` |
+| Version | `0.1.0` |
+| Author | Bihruze (Asahi0x), Hermes Agent |
+| License | MIT |
+| Platforms | linux, macos |
+| Tags | `evolution`, `optimization`, `prompt-engineering`, `research` |
+| Related skills | [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv), [`jupyter-live-kernel`](/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Darwinian Evolver
+
+Run Imbue's [darwinian_evolver](https://github.com/imbue-ai/darwinian_evolver) — an
+LLM-driven evolutionary search loop — to optimize a **prompt, regex, SQL query,
+or small code snippet** against a fitness function.
+
+Status: thin wrapper around the upstream tool. The skill installs it, walks the
+agent through writing a `Problem` definition (organism + evaluator + mutator),
+and drives the loop via the upstream CLI or a small custom Python driver.
+
+**License:** the upstream tool is **AGPL-3.0**. The skill ONLY ever invokes it
+via the upstream CLI or a `subprocess`/`uv run` call (mere aggregation). Do NOT
+import upstream classes into Hermes itself.
+
+## When to Use
+
+- User says "optimize this prompt", "evolve a regex for X", "auto-improve this
+  code/SQL", "search for a better instruction".
+- You have a scorer (exact match, regex pass-rate, unit test, LLM-judge, runtime
+  metric) AND a starting candidate (organism). If you don't have a scorer, stop
+  and define one first — that's the hard part.
+- Cost is OK: a typical run is 50–500 LLM calls. On gpt-4o-mini that's pennies;
+  on Claude Sonnet it can be a few dollars.
+
+Do **not** use this when:
+- The optimization target is differentiable (use gradient descent / DSPy).
+- You only need to try 2–3 variants — just write them by hand.
+- The fitness signal is purely subjective with no measurable criterion.
+
+## Prerequisites
+
+- Python ≥3.11
+- `git`, `uv` (or `pip`)
+- One of: `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, or `OPENAI_API_KEY`
+
+The skill ships a small `parrot_openrouter.py` driver that uses `OPENROUTER_API_KEY`
+via the OpenAI SDK, so any model on OpenRouter works. The upstream CLI itself
+hardcodes Anthropic and needs `ANTHROPIC_API_KEY`.
+
+## Install (One-Time)
+
+Run via the `terminal` tool:
+
+```bash
+mkdir -p ~/.hermes/cache/darwinian-evolver && cd ~/.hermes/cache/darwinian-evolver
+[ -d darwinian_evolver ] || git clone --depth 1 https://github.com/imbue-ai/darwinian_evolver.git
+cd darwinian_evolver && uv sync
+```
+
+Verify:
+
+```bash
+cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver \
+  && uv run darwinian_evolver --help | head -5
+```
+
+## Quick Start — The Built-In Parrot Example
+
+Tiny smoke test (requires `ANTHROPIC_API_KEY`):
+
+```bash
+cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver
+uv run darwinian_evolver parrot \
+  --num_iterations 2 \
+  --num_parents_per_iteration 2 \
+  --mutator_concurrency 2 --evaluator_concurrency 2 \
+  --output_dir /tmp/parrot_demo
+```
+
+Outputs:
+- `/tmp/parrot_demo/snapshots/iteration_N.pkl` — pickled population per iteration
+- `/tmp/parrot_demo/<jsonl>` — per-iteration JSON log (path printed at end)
+
+Open `~/.hermes/cache/darwinian-evolver/darwinian_evolver/darwinian_evolver/lineage_visualizer.html`
+in a browser and load the JSON log to see the evolutionary tree.
+
+## Quick Start — OpenRouter Driver (No Anthropic Key)
+
+The skill ships `scripts/parrot_openrouter.py` — same parrot problem, but the
+LLM call goes through OpenRouter so any provider works.
+
+```bash
+# From wherever the skill is installed:
+SKILL_DIR=~/.hermes/skills/research/darwinian-evolver
+DE_DIR=~/.hermes/cache/darwinian-evolver/darwinian_evolver
+
+cd "$DE_DIR" && \
+  EVOLVER_MODEL='openai/gpt-4o-mini' \
+  uv run --with openai python "$SKILL_DIR/scripts/parrot_openrouter.py" \
+    --num_iterations 3 --num_parents_per_iteration 2 \
+    --output_dir /tmp/parrot_or
+```
+
+Inspect the result with `scripts/show_snapshot.py`:
+
+```bash
+uv run --with openai python "$SKILL_DIR/scripts/show_snapshot.py" \
+  /tmp/parrot_or/snapshots/iteration_3.pkl
+```
+
+Expected output: 7 evolved prompt templates ranked by score, with the best
+landing around 0.6–0.8 (the seed `Say {{ phrase }}` scored 0.000).
+
+## Defining a Custom Problem
+
+The skill ships `templates/custom_problem_template.py` — copy, edit, run.
+Three things you must define:
+
+1. **`Organism`** — a Pydantic `BaseModel` subclass holding the artifact being
+   evolved (`prompt_template: str`, `regex_pattern: str`, `sql_query: str`,
+   `code_block: str`, etc.). Add a `run(*args)` method that exercises it.
+
+2. **`Evaluator`** — `.evaluate(organism) -> EvaluationResult(score=..., trainable_failure_cases=[...], holdout_failure_cases=[...], is_viable=True)`.
+   - **`score`** is in `[0, 1]`. Higher is better.
+   - **`trainable_failure_cases`** — what the mutator sees. Include enough
+     context (input, expected, actual) for the LLM to diagnose.
+   - **`holdout_failure_cases`** — kept out of the mutator's view. Use these
+     to detect overfitting.
+   - **`is_viable=True`** unless the organism is completely broken (raises,
+     returns None, etc.). A 0-score viable organism is fine — it just gets
+     down-weighted in parent selection.
+
+3. **`Mutator`** — `.mutate(organism, failure_cases, learning_log_entries) -> list[Organism]`.
+   Typically: build an LLM prompt that includes the current organism + a
+   failure case + an ask to propose a fix; parse the LLM's response; return
+   a new `Organism`. Return `[]` on parse failure — the loop handles it.
+
+Then write a driver script that wires `Problem(initial_organism, evaluator, [mutators])`
+into `EvolveProblemLoop` and iterates over `loop.run(num_iterations=N)` — the
+shipped `scripts/parrot_openrouter.py` is the reference.
+
+## Hyperparameters That Actually Matter
+
+| flag | default | when to change |
+|---|---|---|
+| `--num_iterations` | 5 | bump to 10–20 once you trust the evaluator |
+| `--num_parents_per_iteration` | 4 | drop to 2 for cheap exploration |
+| `--mutator_concurrency` | 10 | drop to 2–4 to avoid rate limits |
+| `--evaluator_concurrency` | 10 | same; evaluator hits the LLM too |
+| `--batch_size` | 1 | raise to 3–5 once your mutator handles multiple failures |
+| `--verify_mutations` | off | turn on once mutator is wasteful (>10× cost saving on later runs per Imbue) |
+| `--midpoint_score` | `p75` | leave alone unless scores cluster |
+| `--sharpness` | 10 | leave alone |
+
+## Pitfalls
+
+1. **`Initial organism must be viable`** — set `is_viable=True` in your
+   `EvaluationResult` even on a 0-score seed. The loop refuses non-viable
+   organisms because they imply the loop has nothing to evolve from.
+2. **Provider content filters kill runs.** Azure-backed OpenRouter models
+   reject phrases like "ignore previous instructions" with HTTP 400. Wrap
+   the LLM call in `try/except` and return `f"<LLM_ERROR: {e}>"` — the
+   evolver will just score that organism 0 and move on.
+3. **`loop.run()` is a generator** — calling it doesn't run anything until
+   you iterate. Use `for snap in loop.run(num_iterations=N):`.
+4. **Snapshots are nested pickles.** `iteration_N.pkl` contains a dict with
+   `population_snapshot` (more pickled bytes). To unpickle you must have the
+   `Organism` class importable under the same dotted path it was pickled at.
+5. **Concurrency defaults are aggressive.** 10/10 will hit rate limits on
+   most providers. Start with 2/2.
+6. **CLI is hardcoded to Anthropic.** `uv run darwinian_evolver <problem>`
+   reaches for `ANTHROPIC_API_KEY` and uses Claude Sonnet. To use any other
+   provider, write a driver like `parrot_openrouter.py`.
+7. **AGPL.** Never `from darwinian_evolver import ...` inside Hermes core.
+   Custom driver scripts under `~/.hermes/skills/...` are user-side and fine.
+8. **No PyPI package.** `pip install darwinian-evolver` will pull the wrong
+   thing. Always install from the GitHub repo.
+
+## Verification
+
+After install + a parrot run, exit code 0 from this is sufficient:
+
+```bash
+DE_DIR=~/.hermes/cache/darwinian-evolver/darwinian_evolver
+ls "$DE_DIR/darwinian_evolver/lineage_visualizer.html" >/dev/null && \
+cd "$DE_DIR" && uv run darwinian_evolver --help >/dev/null && \
+echo "darwinian-evolver: OK"
+```
+
+## References
+
+- [Imbue research post](https://imbue.com/research/2026-02-27-darwinian-evolver/)
+- [ARC-AGI-2 results](https://imbue.com/research/2026-02-27-arc-agi-2-evolution/)
+- [imbue-ai/darwinian_evolver](https://github.com/imbue-ai/darwinian_evolver) (AGPL-3.0)
+- [Darwin Gödel Machines](https://arxiv.org/abs/2505.22954)
+- [PromptBreeder](https://arxiv.org/abs/2309.16797)
diff --git a/website/sidebars.ts b/website/sidebars.ts
index 52ed452d046..3bce8dfc5c9 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -547,6 +547,7 @@ const sidebars: SidebarsConfig = {
                   collapsed: true,
                   items: [
                     'user-guide/skills/optional/research/research-bioinformatics',
+                    'user-guide/skills/optional/research/research-darwinian-evolver',
                     'user-guide/skills/optional/research/research-domain-intel',
                     'user-guide/skills/optional/research/research-drug-discovery',
                     'user-guide/skills/optional/research/research-duckduckgo-search',

From 9c304a7f569ebf17efe120d5b61a3a745c6dc532 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Fri, 15 May 2026 22:33:16 -0600
Subject: [PATCH 110/218] fix(agent): retry malformed anthropic stream parser
 errors

---
 run_agent.py                      | 57 ++++++++++++++++++---
 tests/run_agent/test_streaming.py | 83 ++++++++++++++++++++++++++++++-
 2 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index b3cde9eb1ea..88d5c95fcd8 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3027,6 +3027,24 @@ class AIAgent:
             parts.append(f"{type(e).__name__}({msg})" if msg else type(e).__name__)
         return " <- ".join(parts) if parts else type(error).__name__
 
+    def _is_provider_stream_parse_error(self, error: BaseException) -> bool:
+        """Return True for malformed provider streaming data from SDK parsers.
+
+        Some Anthropic-compatible streaming providers can send a malformed
+        event-stream frame.  The Anthropic SDK surfaces that as a plain
+        ``ValueError`` such as ``expected ident at line 1 column 149``.  That
+        is provider wire-format trouble, not local request validation, so it
+        should follow the same retry path as a truncated JSON body.
+        """
+        if getattr(self, "api_mode", None) != "anthropic_messages":
+            return False
+        if not isinstance(error, ValueError):
+            return False
+        if isinstance(error, (UnicodeEncodeError, json.JSONDecodeError)):
+            return False
+        message = str(error).strip().lower()
+        return "expected ident at line" in message
+
     def _log_stream_retry(
         self,
         *,
@@ -5080,6 +5098,12 @@ class AIAgent:
         """
         raw = str(error)
 
+        if (
+            isinstance(error, ValueError)
+            and "expected ident at line" in raw.lower()
+        ):
+            return f"Malformed provider streaming response: {raw[:300]}"
+
         # Cloudflare / proxy HTML pages: grab the <title> for a clean summary
         if "<!DOCTYPE" in raw or "<html" in raw:
             m = re.search(r"<title[^>]*>([^<]+)</title>", raw, re.IGNORECASE)
@@ -8528,6 +8552,7 @@ class AIAgent:
                         _is_conn_err = isinstance(
                             e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
                         )
+                        _is_stream_parse_err = self._is_provider_stream_parse_error(e)
 
                         # If the stream died AFTER some tokens were delivered:
                         # normally we don't retry (the user already saw text,
@@ -8567,7 +8592,10 @@ class AIAgent:
                                         for phrase in _SSE_PREVIEW_PHRASES
                                     )
                             _is_transient = (
-                                _is_timeout or _is_conn_err or _is_sse_conn_err_preview
+                                _is_timeout
+                                or _is_conn_err
+                                or _is_sse_conn_err_preview
+                                or _is_stream_parse_err
                             )
                             _can_silent_retry = (
                                 _partial_tool_in_flight
@@ -8665,7 +8693,7 @@ class AIAgent:
                                     for phrase in _SSE_CONN_PHRASES
                                 )
 
-                        if _is_timeout or _is_conn_err or _is_sse_conn_err:
+                        if _is_timeout or _is_conn_err or _is_sse_conn_err or _is_stream_parse_err:
                             # Transient network / timeout error. Retry the
                             # streaming request with a fresh connection first.
                             if _stream_attempt < _max_stream_retries:
@@ -8706,12 +8734,20 @@ class AIAgent:
                                 mid_tool_call=False,
                                 diag=request_client_holder.get("diag"),
                             )
-                            self._emit_status(
-                                "❌ Connection to provider failed after "
-                                f"{_max_stream_retries + 1} attempts. "
-                                "The provider may be experiencing issues — "
-                                "try again in a moment."
-                            )
+                            if _is_stream_parse_err:
+                                self._emit_status(
+                                    "❌ Provider returned malformed streaming data after "
+                                    f"{_max_stream_retries + 1} attempts. "
+                                    "The provider may be experiencing issues — "
+                                    "try again in a moment."
+                                )
+                            else:
+                                self._emit_status(
+                                    "❌ Connection to provider failed after "
+                                    f"{_max_stream_retries + 1} attempts. "
+                                    "The provider may be experiencing issues — "
+                                    "try again in a moment."
+                                )
                         else:
                             _err_lower = str(e).lower()
                             _is_stream_unsupported = (
@@ -14509,11 +14545,16 @@ class AIAgent:
                     # provider/network failure (malformed response body,
                     # truncated stream, routing layer corruption), not a
                     # local programming bug, and should be retried (#14782).
+                    # Exclude Anthropic stream parser ValueErrors for the
+                    # same reason: third-party Anthropic-compatible providers
+                    # can emit malformed event-stream frames that SDK parsers
+                    # raise as plain ValueError.
                     is_local_validation_error = (
                         isinstance(api_error, (ValueError, TypeError))
                         and not isinstance(
                             api_error, (UnicodeEncodeError, json.JSONDecodeError)
                         )
+                        and not self._is_provider_stream_parse_error(api_error)
                         # ssl.SSLError (and its subclass SSLCertVerificationError)
                         # inherits from OSError *and* ValueError via Python MRO,
                         # so the isinstance(ValueError) check above would
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index e636498c462..1ce140f82bf 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -999,6 +999,88 @@ class TestAnthropicStreamCallbacks:
 
         assert touch_calls.count("receiving stream response") == len(events)
 
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    def test_anthropic_stream_parser_valueerror_retries_before_delivery(
+        self, mock_replace, monkeypatch,
+    ):
+        """Malformed Anthropic event-stream frames retry instead of surfacing HTTP None."""
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://api.minimax.io/anthropic",
+            provider="minimax",
+            model="MiniMax-M2.7",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "anthropic_messages"
+        agent._interrupt_requested = False
+        monkeypatch.setenv("HERMES_STREAM_RETRIES", "1")
+
+        class _BadStream:
+            response = None
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, *_args):
+                return False
+
+            def __iter__(self):
+                raise ValueError("expected ident at line 1 column 149")
+
+        final_message = SimpleNamespace(content=[], stop_reason="end_turn")
+        good_stream = MagicMock()
+        good_stream.__enter__ = MagicMock(return_value=good_stream)
+        good_stream.__exit__ = MagicMock(return_value=False)
+        good_stream.__iter__ = MagicMock(return_value=iter([]))
+        good_stream.get_final_message.return_value = final_message
+
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.stream.side_effect = [
+            _BadStream(),
+            good_stream,
+        ]
+
+        response = agent._interruptible_streaming_api_call({})
+
+        assert response is final_message
+        assert agent._anthropic_client.messages.stream.call_count == 2
+        assert mock_replace.call_count == 1
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    def test_generic_anthropic_valueerror_still_propagates_without_stream_retry(
+        self, mock_replace, monkeypatch,
+    ):
+        """Only known provider stream parser ValueErrors are treated as transient."""
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://api.minimax.io/anthropic",
+            provider="minimax",
+            model="MiniMax-M2.7",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "anthropic_messages"
+        agent._interrupt_requested = False
+        monkeypatch.setenv("HERMES_STREAM_RETRIES", "1")
+
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.stream.side_effect = ValueError(
+            "invalid local request shape"
+        )
+
+        with pytest.raises(ValueError, match="invalid local request shape"):
+            agent._interruptible_streaming_api_call({})
+
+        assert agent._anthropic_client.messages.stream.call_count == 1
+        assert mock_replace.call_count == 0
+
 
 class TestPartialToolCallWarning:
     """Regression: when a stream dies mid tool-call argument generation after
@@ -1504,4 +1586,3 @@ class TestCopilotACPStreamingDecision:
             _use_streaming = False
 
         assert _use_streaming is True
-

From 016c772e7fcf3acca54e7c87e7c5a22541adb5d0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 22:12:57 -0700
Subject: [PATCH 111/218] feat(plugins): tool override flag for replacing
 built-in tools (closes #11049) (#26759)

Plugins can now replace a built-in tool by passing override=True to
ctx.register_tool(). Without it, the registry rejects any registration
that would shadow an existing tool from a different toolset (unchanged
default behavior).

Unlocks the use case from #11049: drop-in replacement of browser/web
backends without forking core. Composes with the existing pre_tool_call
hook for runtime interception of any implementation.

The override is audit-logged at INFO so it surfaces in agent.log.
---
 hermes_cli/plugins.py                        |  15 ++-
 tests/hermes_cli/test_plugins.py             | 123 +++++++++++++++++++
 tools/registry.py                            |  23 +++-
 website/docs/guides/build-a-hermes-plugin.md |  24 ++++
 4 files changed, 180 insertions(+), 5 deletions(-)

diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index 9e9af0e0644..d0bbee6ce63 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -325,8 +325,15 @@ class PluginContext:
         is_async: bool = False,
         description: str = "",
         emoji: str = "",
+        override: bool = False,
     ) -> None:
-        """Register a tool in the global registry **and** track it as plugin-provided."""
+        """Register a tool in the global registry **and** track it as plugin-provided.
+
+        Pass ``override=True`` to replace an existing built-in tool with the
+        same name (e.g. swap the default ``browser_navigate`` for a custom
+        CDP-backed implementation). Without it, attempting to register a name
+        already claimed by a different toolset is rejected.
+        """
         from tools.registry import registry
 
         registry.register(
@@ -339,9 +346,13 @@ class PluginContext:
             is_async=is_async,
             description=description,
             emoji=emoji,
+            override=override,
         )
         self._manager._plugin_tool_names.add(name)
-        logger.debug("Plugin %s registered tool: %s", self.manifest.name, name)
+        logger.debug(
+            "Plugin %s registered tool: %s%s",
+            self.manifest.name, name, " (override)" if override else "",
+        )
 
     # -- message injection --------------------------------------------------
 
diff --git a/tests/hermes_cli/test_plugins.py b/tests/hermes_cli/test_plugins.py
index 7be43a236f2..0c500297a2b 100644
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@@ -662,6 +662,129 @@ class TestPluginContext:
         from tools.registry import registry
         assert "plugin_echo" in registry._tools
 
+    def test_register_tool_rejects_shadow_without_override(self, tmp_path, monkeypatch, caplog):
+        """Without override=True, registering a tool name claimed by a different toolset is rejected."""
+        from tools.registry import registry
+
+        # Seed an existing entry from a non-plugin toolset.
+        registry.register(
+            name="shadow_target",
+            toolset="terminal",
+            schema={"name": "shadow_target", "description": "Built-in", "parameters": {"type": "object", "properties": {}}},
+            handler=lambda args, **kw: "built-in",
+        )
+        original_handler = registry._tools["shadow_target"].handler
+        try:
+            plugins_dir = tmp_path / "hermes_test" / "plugins"
+            plugin_dir = plugins_dir / "shadow_plugin"
+            plugin_dir.mkdir(parents=True)
+            (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "shadow_plugin"}))
+            (plugin_dir / "__init__.py").write_text(
+                'def register(ctx):\n'
+                '    ctx.register_tool(\n'
+                '        name="shadow_target",\n'
+                '        toolset="plugin_shadow_plugin",\n'
+                '        schema={"name": "shadow_target", "description": "Plugin", "parameters": {"type": "object", "properties": {}}},\n'
+                '        handler=lambda args, **kw: "plugin",\n'
+                '    )\n'
+            )
+            hermes_home = tmp_path / "hermes_test"
+            (hermes_home / "config.yaml").write_text(
+                yaml.safe_dump({"plugins": {"enabled": ["shadow_plugin"]}})
+            )
+            monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+            with caplog.at_level(logging.ERROR, logger="tools.registry"):
+                mgr = PluginManager()
+                mgr.discover_and_load()
+
+            # Original handler must still be in place — registration was rejected.
+            assert registry._tools["shadow_target"].handler is original_handler
+            assert registry._tools["shadow_target"].toolset == "terminal"
+            # And an ERROR was logged explaining why and how to opt in.
+            assert any("override=True" in r.message for r in caplog.records)
+        finally:
+            registry.deregister("shadow_target")
+
+    def test_register_tool_override_replaces_existing(self, tmp_path, monkeypatch, caplog):
+        """override=True lets a plugin replace an existing built-in tool."""
+        from tools.registry import registry
+
+        registry.register(
+            name="override_target",
+            toolset="terminal",
+            schema={"name": "override_target", "description": "Built-in", "parameters": {"type": "object", "properties": {}}},
+            handler=lambda args, **kw: "built-in",
+        )
+        try:
+            plugins_dir = tmp_path / "hermes_test" / "plugins"
+            plugin_dir = plugins_dir / "override_plugin"
+            plugin_dir.mkdir(parents=True)
+            (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "override_plugin"}))
+            (plugin_dir / "__init__.py").write_text(
+                'def register(ctx):\n'
+                '    ctx.register_tool(\n'
+                '        name="override_target",\n'
+                '        toolset="plugin_override_plugin",\n'
+                '        schema={"name": "override_target", "description": "Plugin", "parameters": {"type": "object", "properties": {}}},\n'
+                '        handler=lambda args, **kw: "plugin",\n'
+                '        override=True,\n'
+                '    )\n'
+            )
+            hermes_home = tmp_path / "hermes_test"
+            (hermes_home / "config.yaml").write_text(
+                yaml.safe_dump({"plugins": {"enabled": ["override_plugin"]}})
+            )
+            monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+            with caplog.at_level(logging.INFO, logger="tools.registry"):
+                mgr = PluginManager()
+                mgr.discover_and_load()
+
+            # Plugin handler replaced the built-in one.
+            assert registry._tools["override_target"].toolset == "plugin_override_plugin"
+            assert registry._tools["override_target"].handler({}, ) == "plugin"
+            # Override is audit-logged at INFO.
+            assert any(
+                "overriding existing" in r.message and "override_target" in r.message
+                for r in caplog.records
+            )
+            # Plugin tracks it.
+            assert "override_target" in mgr._plugin_tool_names
+        finally:
+            registry.deregister("override_target")
+
+    def test_register_tool_override_on_new_name_is_noop_path(self, tmp_path, monkeypatch):
+        """override=True on a brand-new name still registers cleanly (no existing entry to replace)."""
+        from tools.registry import registry
+
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        plugin_dir = plugins_dir / "new_override_plugin"
+        plugin_dir.mkdir(parents=True)
+        (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "new_override_plugin"}))
+        (plugin_dir / "__init__.py").write_text(
+            'def register(ctx):\n'
+            '    ctx.register_tool(\n'
+            '        name="brand_new_override_tool",\n'
+            '        toolset="plugin_new_override_plugin",\n'
+            '        schema={"name": "brand_new_override_tool", "description": "New", "parameters": {"type": "object", "properties": {}}},\n'
+            '        handler=lambda args, **kw: "ok",\n'
+            '        override=True,\n'
+            '    )\n'
+        )
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["new_override_plugin"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        try:
+            mgr = PluginManager()
+            mgr.discover_and_load()
+            assert "brand_new_override_tool" in registry._tools
+        finally:
+            registry.deregister("brand_new_override_tool")
+
 
 # ── TestPluginToolVisibility ───────────────────────────────────────────────
 
diff --git a/tools/registry.py b/tools/registry.py
index 9cac53084bd..2639eac74ed 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -244,8 +244,16 @@ class ToolRegistry:
         emoji: str = "",
         max_result_size_chars: int | float | None = None,
         dynamic_schema_overrides: Callable = None,
+        override: bool = False,
     ):
-        """Register a tool.  Called at module-import time by each tool file."""
+        """Register a tool.  Called at module-import time by each tool file.
+
+        ``override=True`` is an explicit opt-in for plugins that intend to
+        replace an existing built-in tool implementation (e.g. swap the
+        default browser tool for a headed-Chrome CDP backend). Without it,
+        registrations that would shadow an existing tool from a different
+        toolset are rejected to prevent accidental overwrites.
+        """
         with self._lock:
             existing = self._tools.get(name)
             if existing and existing.toolset != toolset:
@@ -260,13 +268,22 @@ class ToolRegistry:
                         "Tool '%s': MCP toolset '%s' overwriting MCP toolset '%s'",
                         name, toolset, existing.toolset,
                     )
+                elif override:
+                    # Explicit plugin opt-in: replace the existing tool.
+                    # Logged at INFO so the override is auditable in agent.log.
+                    logger.info(
+                        "Tool '%s': toolset '%s' overriding existing toolset '%s' "
+                        "(override=True opt-in)",
+                        name, toolset, existing.toolset,
+                    )
                 else:
                     # Reject shadowing — prevent plugins/MCP from overwriting
                     # built-in tools or vice versa.
                     logger.error(
                         "Tool registration REJECTED: '%s' (toolset '%s') would "
-                        "shadow existing tool from toolset '%s'. Deregister the "
-                        "existing tool first if this is intentional.",
+                        "shadow existing tool from toolset '%s'. Pass "
+                        "override=True to register() if the replacement is "
+                        "intentional, or deregister the existing tool first.",
                         name, toolset, existing.toolset,
                     )
                     return
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index ee74e23ac5e..3135c68daaf 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -465,6 +465,30 @@ ctx.register_tool(
 )
 ```
 
+### Overriding a built-in tool
+
+To replace a built-in tool with your own implementation (e.g. swap the
+default browser tool for a headed-Chrome CDP backend, or replace
+`web_search` with a custom corporate index), pass `override=True`:
+
+```python
+def register(ctx):
+    ctx.register_tool(
+        name="browser_navigate",             # same name as the built-in
+        toolset="plugin_my_browser",         # your own toolset namespace
+        schema={...},
+        handler=my_custom_navigate,
+        override=True,                       # explicit opt-in
+    )
+```
+
+Without `override=True`, the registry rejects any registration that would
+shadow an existing tool from a different toolset — this prevents
+accidental overwrites. The override is logged at INFO level so it's
+auditable in `~/.hermes/logs/agent.log`. Plugins load after built-in
+tools, so the registration order is correct: your handler replaces the
+built-in one.
+
 ### Register multiple hooks
 
 ```python

From afb97dbc539d1b6cc812d5af2bb8e9b3ebfc4719 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 21:49:31 -0700
Subject: [PATCH 112/218] docs: add Programmatic Integration overview (closes
 #360)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document the three protocols already available for driving hermes-agent
from external programs — ACP, the TUI gateway JSON-RPC, and the
OpenAI-compatible API server — with a 'which one should I use' guide and
a Pi-style RPC command mapping table. Sidebar entry under Developer
Guide -> Architecture.
---
 .../programmatic-integration.md               | 126 ++++++++++++++++++
 website/sidebars.ts                           |   1 +
 2 files changed, 127 insertions(+)
 create mode 100644 website/docs/developer-guide/programmatic-integration.md

diff --git a/website/docs/developer-guide/programmatic-integration.md b/website/docs/developer-guide/programmatic-integration.md
new file mode 100644
index 00000000000..1ad0b13ef91
--- /dev/null
+++ b/website/docs/developer-guide/programmatic-integration.md
@@ -0,0 +1,126 @@
+---
+sidebar_position: 8
+title: "Programmatic Integration"
+description: "Three protocols for driving hermes-agent from external programs: ACP, the TUI gateway JSON-RPC, and the OpenAI-compatible HTTP API"
+---
+
+# Programmatic Integration
+
+Hermes ships three protocols for driving the agent from external programs — IDE plugins, custom UIs, CI pipelines, embedded sub-agents. Pick the one that matches your transport and consumer.
+
+| Protocol | Transport | Best for | Defined by |
+|----------|-----------|----------|------------|
+| **ACP** | JSON-RPC over stdio | IDE clients (VS Code, Zed, JetBrains) that already speak the [Agent Client Protocol](https://github.com/zed-industries/agent-client-protocol) | `acp_adapter/` |
+| **TUI gateway** | JSON-RPC over stdio (or WebSocket) | Custom hosts that want fine-grained control of sessions, slash commands, approvals, and streaming events | `tui_gateway/server.py` |
+| **API server** | HTTP + Server-Sent Events | OpenAI-compatible frontends (Open WebUI, LobeChat, LibreChat…) and language-agnostic web clients | `gateway/platforms/api_server.py` |
+
+All three drive the same `AIAgent` core. They differ only in wire format and which set of features they expose.
+
+---
+
+## ACP (Agent Client Protocol)
+
+`hermes acp` starts a stdio JSON-RPC server speaking ACP. Used in production by VS Code (Zed Industries' ACP extension), Zed, and any JetBrains IDE with an ACP plugin.
+
+Capabilities exposed: session creation, prompt submission, streaming agent message chunks, tool-call events, permission requests, session fork, cancel, and authentication. Tool output is rendered into ACP `Diff`/`ToolCall` content blocks the IDE understands.
+
+Full lifecycle, event bridge, and approval flow: [ACP Internals](./acp-internals).
+
+```bash
+hermes acp                  # serve ACP on stdio
+hermes acp --bootstrap      # print install snippet for an ACP-capable IDE
+```
+
+---
+
+## TUI Gateway JSON-RPC
+
+`tui_gateway/server.py` is the protocol the Ink TUI (`hermes --tui`) and the embedded dashboard PTY bridge talk to. Any external host can speak the same protocol over stdio (or WebSocket via `tui_gateway/ws.py`).
+
+### Method catalog (selected)
+
+```
+prompt.submit           prompt.background       session.steer
+session.create          session.list            session.interrupt
+session.history         session.compress        session.branch
+session.title           session.usage           session.status
+clarify.respond         sudo.respond            secret.respond
+approval.respond        config.set / config.get commands.catalog
+command.resolve         command.dispatch        cli.exec
+reload.mcp              reload.env              process.stop
+delegation.status       subagent.interrupt      spawn_tree.save / list / load
+terminal.resize         clipboard.paste         image.attach
+```
+
+### Events streamed back
+
+`message.delta`, `message.complete`, `tool.start`, `tool.progress`, `tool.complete`, `approval.request`, `clarify.request`, `sudo.request`, `secret.request`, `gateway.ready`, plus session lifecycle and error events.
+
+### Pi-style RPC mapping
+
+Every command in the Pi-mono RPC spec ([issue #360](https://github.com/NousResearch/hermes-agent/issues/360)) has a TUI-gateway equivalent:
+
+| Pi command | Hermes equivalent |
+|------------|-------------------|
+| `prompt` | `prompt.submit` (or ACP `session/prompt`) |
+| `steer` | `session.steer` |
+| `follow_up` | `prompt.submit` queued after current turn |
+| `abort` | `session.interrupt` |
+| `set_model` | `command.dispatch` for `/model <provider:model>` (mid-session, persistent) |
+| `compact` | `session.compress` |
+| `get_state` | `session.status` |
+| `get_messages` | `session.history` |
+| `switch_session` | `session.resume` |
+| `fork` | `session.branch` |
+| `ui_request` / `ui_response` | `clarify.respond` / `sudo.respond` / `secret.respond` / `approval.respond` |
+
+---
+
+## OpenAI-Compatible API Server
+
+`gateway/platforms/api_server.py` exposes hermes over HTTP for any client that already speaks the OpenAI format. Useful when you want a web frontend, a curl-driven CI runner, or a non-Python consumer.
+
+Endpoints:
+
+```
+POST /v1/chat/completions        OpenAI Chat Completions (streaming via SSE)
+POST /v1/responses               OpenAI Responses API (stateful)
+POST /v1/runs                    Start a run, returns run_id (202)
+GET  /v1/runs/{id}               Run status
+GET  /v1/runs/{id}/events        SSE stream of lifecycle events
+POST /v1/runs/{id}/approval      Resolve a pending approval
+POST /v1/runs/{id}/stop          Interrupt the run
+GET  /v1/capabilities            Machine-readable feature flags
+GET  /v1/models                  Lists hermes-agent
+GET  /health, /health/detailed
+```
+
+Setup, headers (`X-Hermes-Session-Id`, `X-Hermes-Session-Key`), and frontend wiring: [API Server](../user-guide/features/api-server).
+
+---
+
+## Which one should I use?
+
+- **You're writing an IDE plugin and the IDE already speaks ACP** → ACP. Zero protocol work on the IDE side.
+- **You're writing a custom desktop / web / TUI host and want every Hermes feature** (slash commands, approvals, clarify, multi-agent, session branching) → TUI gateway JSON-RPC.
+- **You want any OpenAI-compatible frontend, a language-agnostic HTTP client, or curl-driven automation** → API server.
+- **You want a Python in-process embed without a subprocess** → import `run_agent.AIAgent` directly. See [Agent Loop](./agent-loop).
+
+---
+
+## Model hot-swapping
+
+Mid-session model switching works on every surface — it's the `/model` slash command under the hood.
+
+- **CLI / TUI:** `/model claude-sonnet-4` or `/model openrouter:anthropic/claude-sonnet-4.6`
+- **TUI gateway RPC:** `command.dispatch` with `{"command": "/model claude-sonnet-4"}`
+- **ACP:** the IDE sends the slash command as a prompt; the agent dispatches it
+- **API server:** include a `model` field in the request body or set `X-Hermes-Model`
+
+Provider-aware resolution (the same model name picks the right format for whatever provider you're on) is built in. See `hermes_cli/model_switch.py`.
+
+---
+
+## A note on `--mode rpc`
+
+Hermes does not have a `--mode rpc` flag. The three protocols above already cover the use cases — ACP for IDE-protocol clients, the TUI gateway for stdio JSON-RPC hosts, and the API server for HTTP. If you find a real gap that none of them fill, open an issue with the concrete consumer you're building.
diff --git a/website/sidebars.ts b/website/sidebars.ts
index 3bce8dfc5c9..e8bc7ccb13e 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -690,6 +690,7 @@ const sidebars: SidebarsConfig = {
             'developer-guide/gateway-internals',
             'developer-guide/session-storage',
             'developer-guide/provider-runtime',
+            'developer-guide/programmatic-integration',
           ],
         },
         {

From 559c6ad94aee03ddbd28b9480b9dabac292213a2 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Fri, 15 May 2026 22:14:47 -0700
Subject: [PATCH 113/218] feat(skills): add optional pinggy-tunnel skill

Zero-install localhost tunnels over SSH via Pinggy. Covers HTTP/HTTPS,
TCP, TLS, access control (basic auth / bearer / IP whitelist), header
manipulation (CORS, force-HTTPS), web debugger, Pro token mode, and four
composite recipes (webhook receiver, MCP server exposure, local LLM
endpoint share, dev-server quick-share with one-shot password).

Closes #361
---
 optional-skills/devops/pinggy-tunnel/SKILL.md | 309 +++++++++++++++++
 .../docs/reference/optional-skills-catalog.md |   1 +
 .../optional/devops/devops-pinggy-tunnel.md   | 327 ++++++++++++++++++
 website/sidebars.ts                           |   1 +
 4 files changed, 638 insertions(+)
 create mode 100644 optional-skills/devops/pinggy-tunnel/SKILL.md
 create mode 100644 website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md

diff --git a/optional-skills/devops/pinggy-tunnel/SKILL.md b/optional-skills/devops/pinggy-tunnel/SKILL.md
new file mode 100644
index 00000000000..fa9f1d5b67b
--- /dev/null
+++ b/optional-skills/devops/pinggy-tunnel/SKILL.md
@@ -0,0 +1,309 @@
+---
+name: pinggy-tunnel
+description: Zero-install localhost tunnels over SSH via Pinggy.
+version: 0.1.0
+author: Teknium (teknium1), Hermes Agent
+license: MIT
+platforms: [linux, macos, windows]
+metadata:
+  hermes:
+    tags: [Pinggy, Tunnel, Networking, SSH, Webhook, Localhost]
+    related_skills: [cloudflared-quick-tunnel, webhook-subscriptions]
+---
+
+# Pinggy Tunnel Skill
+
+Expose a local service (dev server, webhook receiver, MCP endpoint, demo) to the public internet using a Pinggy SSH reverse tunnel. No daemon to install — the user's stock SSH client connects to `a.pinggy.io:443` and Pinggy hands back a public HTTP/HTTPS URL.
+
+Free tier: 60-minute tunnels, random subdomain, no signup. Pro tier ($3/mo) is an opt-in with a token.
+
+## When to Use
+
+- User asks to "expose this locally", "share my dev server", "make this URL public", "tunnel port N", "get a public URL for a webhook"
+- Need to receive a webhook callback during a local task (Stripe, GitHub, Discord, AgentMail)
+- Sharing a one-off HTTP demo (MCP server, Ollama/vLLM endpoint, dashboard) with a remote party
+- The host has SSH but no `cloudflared` / `ngrok` binary, and installing one would be overkill
+
+If the host already has `cloudflared` configured, prefer the `cloudflared-quick-tunnel` skill — Cloudflare quick tunnels don't expire after 60 minutes.
+
+## Prerequisites
+
+- `ssh` on PATH (`ssh -V`). Default on Linux, macOS, and Windows 10+. No other install.
+- A local service listening on `127.0.0.1:<port>` before the tunnel starts. Pinggy will return URLs but they'll 502 until the local origin is up.
+
+Optional:
+
+- `PINGGY_TOKEN` env var for paid Pro features (persistent subdomain, custom domain, multiple tunnels, no 60-minute cap). Free tier needs no credentials.
+
+## Quick Reference
+
+```bash
+# Plain HTTP/HTTPS tunnel for port 8000 (free tier)
+ssh -p 443 -o StrictHostKeyChecking=no -o ServerAliveInterval=30 \
+    -R0:localhost:8000 free@a.pinggy.io
+
+# TCP tunnel (databases, raw SSH, etc.)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:5432 tcp@a.pinggy.io
+
+# TLS tunnel (Pinggy can't decrypt — bring your own certs at origin)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:443 tls@a.pinggy.io
+
+# Basic auth gate (b:user:pass)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "b:admin:secret+free@a.pinggy.io"
+
+# Bearer token gate (k:token)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "k:mysecrettoken+free@a.pinggy.io"
+
+# IP whitelist (w:CIDR)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "w:203.0.113.0/24+free@a.pinggy.io"
+
+# Enable CORS + force HTTPS redirect
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "co+x:https+free@a.pinggy.io"
+
+# Pro tier (persistent URL, no 60-min cap)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 "$PINGGY_TOKEN+a.pinggy.io"
+```
+
+## Procedure — Start a Tunnel and Get the URL
+
+The model SHOULD use the `terminal` tool. The tunnel must stay alive for the duration of the share, so run it as a background process and parse the public URL from stdout.
+
+### 1. Confirm a local origin is up
+
+```bash
+curl -sI http://127.0.0.1:8000/ | head -1
+# expect HTTP/1.x 200 (or any non-connection-refused response)
+```
+
+If nothing is listening yet, start it first (e.g. `python3 -m http.server 8000 --bind 127.0.0.1`). Pinggy will happily return a URL pointed at nothing — the user will see 502 until the origin comes up.
+
+### 2. Launch the tunnel as a background process
+
+Use `terminal(background=True)` and capture output to a logfile (Pinggy prints the URLs on stdout, then keeps the connection open):
+
+```bash
+LOG=/tmp/pinggy-8000.log
+nohup ssh -p 443 \
+    -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -o ServerAliveCountMax=3 \
+    -R0:localhost:8000 free@a.pinggy.io \
+    > "$LOG" 2>&1 &
+echo $! > /tmp/pinggy-8000.pid
+```
+
+`StrictHostKeyChecking=no` + `UserKnownHostsFile=/dev/null` skips the first-run host-key prompt. `ServerAliveInterval=30` keeps the SSH session from getting torn down by an idle NAT.
+
+### 3. Parse the URL out of the log
+
+```bash
+sleep 4
+grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/pinggy-8000.log | head -1
+```
+
+Expected output looks like:
+
+```
+You are not authenticated.
+Your tunnel will expire in 60 minutes.
+http://yqycl-98-162-69-48.a.free.pinggy.link
+https://yqycl-98-162-69-48.a.free.pinggy.link
+```
+
+Hand the `https://...pinggy.link` URL to the user.
+
+### 4. Verify
+
+```bash
+curl -sI https://<the-url>/ | head -3
+# expect 200/302/whatever the local origin actually returns
+```
+
+If you get `502 Bad Gateway`, the SSH session is up but the local origin isn't listening — fix step 1 first.
+
+### 5. Teardown
+
+```bash
+kill "$(cat /tmp/pinggy-8000.pid)"
+# or, if the pid file got lost:
+pkill -f 'ssh -p 443 .* free@a\.pinggy\.io'
+```
+
+If you have a session_id from `terminal(background=True)`, prefer `process(action='kill', session_id=...)`.
+
+## Access Control via Username Keywords
+
+Pinggy stacks control flags into the SSH username separated by `+`. Always quote the whole `user@host` argument when it contains a `+`:
+
+| Keyword | Effect |
+|---------|--------|
+| `b:user:pass` | HTTP Basic auth gate |
+| `k:token` | Bearer-token header gate (`Authorization: Bearer <token>`) |
+| `w:CIDR` | IP whitelist (single IP or CIDR, repeatable) |
+| `co` | Add `Access-Control-Allow-Origin: *` (CORS) |
+| `x:https` | Force HTTPS — auto-redirect HTTP to HTTPS |
+| `a:Name:Value` | Add request header |
+| `u:Name:Value` | Update request header |
+| `r:Name` | Remove request header |
+| `qr` | Print a QR code of the URL to stdout (handy for mobile sharing) |
+
+Combine freely: `"b:admin:secret+co+x:https+free@a.pinggy.io"`.
+
+## Web Debugger (optional)
+
+Pinggy can mirror the inbound traffic to `localhost:4300` for inspection. Add a local forward to the SSH command:
+
+```bash
+ssh -p 443 -L4300:localhost:4300 -R0:localhost:8000 free@a.pinggy.io
+```
+
+Then open `http://localhost:4300` in a browser to see live request/response pairs.
+
+## Pitfalls
+
+- **60-minute hard cap on the free tier.** The SSH session terminates at the 60-minute mark; the URL goes dead. For longer shares, either use `PINGGY_TOKEN` (Pro) or auto-restart with a shell loop (note that the URL changes on every restart for free-tier).
+- **Free-tier URL is random and changes on restart.** Don't bookmark it, don't paste it into a config file. Re-parse from the log each time.
+- **Concurrent free tunnels are limited to one per source IP.** Starting a second tunnel from the same machine usually kills the first. Pro tier lifts this.
+- **`+` in usernames must be quoted.** Bare `ssh ... b:admin:secret+free@a.pinggy.io` works in bash but breaks under shells that treat `+` specially or when assembled programmatically. Always wrap in double quotes.
+- **Don't tunnel anything sensitive without an access-control flag.** A bare HTTP tunnel is reachable by anyone with the URL. Use `b:`, `k:`, or `w:` for non-public services.
+- **`process(action='log')` may miss SSH banner output.** Pinggy prints the URLs and then the SSH session goes interactive. Always redirect to a logfile and `grep` the file directly — same pattern as `cloudflared-quick-tunnel`.
+- **Host-key prompt on first run.** Default OpenSSH config asks the user to accept Pinggy's host key. Always pass `-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null` for unattended runs.
+- **TCP and TLS tunnels return a `<subdomain>.a.pinggy.online:<port>` pair, not an https URL.** Parse with a different regex (`tcp://` and a port). Don't assume every Pinggy tunnel is HTTP.
+- **Pro mode requires the token as the username, not a flag.** Use `"$PINGGY_TOKEN+a.pinggy.io"` (no `free@`). With a token you can also add `:persistent` for a stable subdomain — see `pinggy.io/docs/`.
+
+## Recipes
+
+Composite patterns combining a local origin with a Pinggy tunnel. Each recipe is self-contained — start the origin, start the tunnel, parse the URL, hand it back to the user.
+
+### Recipe 1 — Receive a webhook callback
+
+Use this when an external service (Stripe, GitHub, Discord, AgentMail, etc.) needs to POST to a publicly reachable URL during a local task.
+
+```bash
+# 1. Tiny capturing server: every request gets appended to /tmp/webhook-hits.log
+cat >/tmp/webhook-server.py <<'PY'
+import http.server, json, datetime, pathlib
+LOG = pathlib.Path("/tmp/webhook-hits.log")
+class H(http.server.BaseHTTPRequestHandler):
+    def _capture(self):
+        n = int(self.headers.get("content-length") or 0)
+        body = self.rfile.read(n).decode("utf-8", "replace") if n else ""
+        rec = {"t": datetime.datetime.utcnow().isoformat(), "path": self.path,
+               "method": self.command, "headers": dict(self.headers), "body": body}
+        with LOG.open("a") as f: f.write(json.dumps(rec) + "\n")
+        self.send_response(200); self.send_header("content-type","application/json")
+        self.end_headers(); self.wfile.write(b'{"ok":true}\n')
+    def do_GET(self): self._capture()
+    def do_POST(self): self._capture()
+    def log_message(self,*a,**k): pass
+http.server.HTTPServer(("127.0.0.1", 18080), H).serve_forever()
+PY
+nohup python3 /tmp/webhook-server.py >/tmp/webhook-server.log 2>&1 &
+echo $! >/tmp/webhook-server.pid
+
+# 2. Tunnel — bearer-token-gate so randos can't pollute the capture log
+nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:18080 "k:$(openssl rand -hex 12)+free@a.pinggy.io" \
+    >/tmp/webhook-pinggy.log 2>&1 &
+echo $! >/tmp/webhook-pinggy.pid
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/webhook-pinggy.log | head -1)
+echo "Webhook URL: $URL"
+
+# 3. While the agent works, watch hits land
+tail -f /tmp/webhook-hits.log
+```
+
+Hand `$URL` to the service that needs to call you. Teardown: `kill $(cat /tmp/webhook-server.pid) $(cat /tmp/webhook-pinggy.pid)`.
+
+### Recipe 2 — Expose an MCP server over HTTP/SSE
+
+Use when a remote MCP client (Claude Desktop on another machine, a teammate's editor, etc.) needs to reach an MCP server running on the local box. Only works for MCP servers that speak HTTP transport — stdio-mode servers can't be tunneled.
+
+```bash
+# 1. Start the MCP server in HTTP mode (example: a FastMCP server on port 8765)
+nohup python3 my_mcp_server.py --transport http --port 8765 \
+    >/tmp/mcp-server.log 2>&1 &
+echo $! >/tmp/mcp-server.pid
+
+# 2. Tunnel with a bearer token — MCP traffic should not be open to the internet
+TOKEN=$(openssl rand -hex 16)
+nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:8765 "k:$TOKEN+free@a.pinggy.io" \
+    >/tmp/mcp-pinggy.log 2>&1 &
+echo $! >/tmp/mcp-pinggy.pid
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/mcp-pinggy.log | head -1)
+echo "MCP URL: $URL"
+echo "Bearer token: $TOKEN"
+```
+
+The remote client connects to `$URL` with `Authorization: Bearer $TOKEN`. Hermes' own native MCP client config: `{"transport": "http", "url": "<URL>", "headers": {"Authorization": "Bearer <TOKEN>"}}`.
+
+### Recipe 3 — Expose a local LLM endpoint (Ollama / vLLM / llama.cpp)
+
+Share a local model with a remote caller (another agent, a phone, a teammate). Ollama listens on `:11434`, vLLM and llama.cpp typically on `:8000`.
+
+```bash
+# Pre-req: the model server is already running on 127.0.0.1:11434 (Ollama default)
+TOKEN=$(openssl rand -hex 16)
+nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:11434 "k:$TOKEN+co+free@a.pinggy.io" \
+    >/tmp/llm-pinggy.log 2>&1 &
+echo $! >/tmp/llm-pinggy.pid
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/llm-pinggy.log | head -1)
+echo "Endpoint: $URL"
+echo "Token:    $TOKEN"
+
+# Verify
+curl -s "$URL/api/tags" -H "Authorization: Bearer $TOKEN" | head
+```
+
+`co` enables CORS so a browser caller can hit the endpoint. Drop `co` for backend-only callers. For an OpenAI-compatible vLLM/llama.cpp endpoint, callers use base URL `$URL/v1` with `Authorization: Bearer $TOKEN` — but note Pinggy strips/replaces nothing in the body, so the model server itself sees Pinggy's token; the local server should be configured to ignore auth (it's already on `127.0.0.1`) and let Pinggy do the gating.
+
+### Recipe 4 — Share a dev server with a one-shot password
+
+The fastest "let a teammate poke at my running app" pattern. Random password, prints once, dies when you Ctrl-C.
+
+```bash
+PASS=$(openssl rand -base64 12 | tr -d '+/=' | head -c 12)
+echo "Dev server password: $PASS"
+ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:3000 "b:dev:$PASS+co+x:https+free@a.pinggy.io"
+# URL prints to the terminal. Share URL + password. Ctrl-C to tear down.
+```
+
+`b:dev:$PASS` gates the URL with HTTP Basic auth. `x:https` forces TLS. `co` adds CORS for SPA frontends.
+
+## Verification
+
+```bash
+# End-to-end: spin up a trivial origin, tunnel it, hit it, tear down
+python3 -m http.server 18000 --bind 127.0.0.1 >/tmp/origin.log 2>&1 &
+ORIGIN_PID=$!
+
+nohup ssh -p 443 \
+    -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null \
+    -R0:localhost:18000 free@a.pinggy.io >/tmp/pinggy-verify.log 2>&1 &
+SSH_PID=$!
+
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/pinggy-verify.log | head -1)
+echo "URL: $URL"
+curl -sI "$URL/" | head -1
+
+kill "$SSH_PID" "$ORIGIN_PID"
+```
+
+Expected: a `pinggy.link` URL and `HTTP/2 200` on the curl head.
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index fc447b7e01f..d1544ce89b9 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -64,6 +64,7 @@ hermes skills uninstall <skill-name>
 |-------|-------------|
 | [**inference-sh-cli**](/docs/user-guide/skills/optional/devops/devops-cli) | Run 150+ AI apps via inference.sh CLI (infsh) — image generation, video creation, LLMs, search, 3D, social automation. Uses the terminal tool. Triggers: inference.sh, infsh, ai apps, flux, veo, image generation, video generation, seedrea... |
 | [**docker-management**](/docs/user-guide/skills/optional/devops/devops-docker-management) | Manage Docker containers, images, volumes, networks, and Compose stacks — lifecycle ops, debugging, cleanup, and Dockerfile optimization. |
+| [**pinggy-tunnel**](/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel) | Zero-install localhost tunnels over SSH via Pinggy. |
 | [**watchers**](/docs/user-guide/skills/optional/devops/devops-watchers) | Poll RSS, JSON APIs, and GitHub with watermark dedup. |
 
 ## dogfood
diff --git a/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md b/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md
new file mode 100644
index 00000000000..19f431f1967
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md
@@ -0,0 +1,327 @@
+---
+title: "Pinggy Tunnel — Zero-install localhost tunnels over SSH via Pinggy"
+sidebar_label: "Pinggy Tunnel"
+description: "Zero-install localhost tunnels over SSH via Pinggy"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Pinggy Tunnel
+
+Zero-install localhost tunnels over SSH via Pinggy.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/devops/pinggy-tunnel` |
+| Path | `optional-skills/devops/pinggy-tunnel` |
+| Version | `0.1.0` |
+| Author | Teknium (teknium1), Hermes Agent |
+| License | MIT |
+| Platforms | linux, macos, windows |
+| Tags | `Pinggy`, `Tunnel`, `Networking`, `SSH`, `Webhook`, `Localhost` |
+| Related skills | `cloudflared-quick-tunnel`, [`webhook-subscriptions`](/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Pinggy Tunnel Skill
+
+Expose a local service (dev server, webhook receiver, MCP endpoint, demo) to the public internet using a Pinggy SSH reverse tunnel. No daemon to install — the user's stock SSH client connects to `a.pinggy.io:443` and Pinggy hands back a public HTTP/HTTPS URL.
+
+Free tier: 60-minute tunnels, random subdomain, no signup. Pro tier ($3/mo) is an opt-in with a token.
+
+## When to Use
+
+- User asks to "expose this locally", "share my dev server", "make this URL public", "tunnel port N", "get a public URL for a webhook"
+- Need to receive a webhook callback during a local task (Stripe, GitHub, Discord, AgentMail)
+- Sharing a one-off HTTP demo (MCP server, Ollama/vLLM endpoint, dashboard) with a remote party
+- The host has SSH but no `cloudflared` / `ngrok` binary, and installing one would be overkill
+
+If the host already has `cloudflared` configured, prefer the `cloudflared-quick-tunnel` skill — Cloudflare quick tunnels don't expire after 60 minutes.
+
+## Prerequisites
+
+- `ssh` on PATH (`ssh -V`). Default on Linux, macOS, and Windows 10+. No other install.
+- A local service listening on `127.0.0.1:<port>` before the tunnel starts. Pinggy will return URLs but they'll 502 until the local origin is up.
+
+Optional:
+
+- `PINGGY_TOKEN` env var for paid Pro features (persistent subdomain, custom domain, multiple tunnels, no 60-minute cap). Free tier needs no credentials.
+
+## Quick Reference
+
+```bash
+# Plain HTTP/HTTPS tunnel for port 8000 (free tier)
+ssh -p 443 -o StrictHostKeyChecking=no -o ServerAliveInterval=30 \
+    -R0:localhost:8000 free@a.pinggy.io
+
+# TCP tunnel (databases, raw SSH, etc.)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:5432 tcp@a.pinggy.io
+
+# TLS tunnel (Pinggy can't decrypt — bring your own certs at origin)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:443 tls@a.pinggy.io
+
+# Basic auth gate (b:user:pass)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "b:admin:secret+free@a.pinggy.io"
+
+# Bearer token gate (k:token)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "k:mysecrettoken+free@a.pinggy.io"
+
+# IP whitelist (w:CIDR)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "w:203.0.113.0/24+free@a.pinggy.io"
+
+# Enable CORS + force HTTPS redirect
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
+    "co+x:https+free@a.pinggy.io"
+
+# Pro tier (persistent URL, no 60-min cap)
+ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 "$PINGGY_TOKEN+a.pinggy.io"
+```
+
+## Procedure — Start a Tunnel and Get the URL
+
+The model SHOULD use the `terminal` tool. The tunnel must stay alive for the duration of the share, so run it as a background process and parse the public URL from stdout.
+
+### 1. Confirm a local origin is up
+
+```bash
+curl -sI http://127.0.0.1:8000/ | head -1
+# expect HTTP/1.x 200 (or any non-connection-refused response)
+```
+
+If nothing is listening yet, start it first (e.g. `python3 -m http.server 8000 --bind 127.0.0.1`). Pinggy will happily return a URL pointed at nothing — the user will see 502 until the origin comes up.
+
+### 2. Launch the tunnel as a background process
+
+Use `terminal(background=True)` and capture output to a logfile (Pinggy prints the URLs on stdout, then keeps the connection open):
+
+```bash
+LOG=/tmp/pinggy-8000.log
+nohup ssh -p 443 \
+    -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -o ServerAliveCountMax=3 \
+    -R0:localhost:8000 free@a.pinggy.io \
+    > "$LOG" 2>&1 &
+echo $! > /tmp/pinggy-8000.pid
+```
+
+`StrictHostKeyChecking=no` + `UserKnownHostsFile=/dev/null` skips the first-run host-key prompt. `ServerAliveInterval=30` keeps the SSH session from getting torn down by an idle NAT.
+
+### 3. Parse the URL out of the log
+
+```bash
+sleep 4
+grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/pinggy-8000.log | head -1
+```
+
+Expected output looks like:
+
+```
+You are not authenticated.
+Your tunnel will expire in 60 minutes.
+http://yqycl-98-162-69-48.a.free.pinggy.link
+https://yqycl-98-162-69-48.a.free.pinggy.link
+```
+
+Hand the `https://...pinggy.link` URL to the user.
+
+### 4. Verify
+
+```bash
+curl -sI https://<the-url>/ | head -3
+# expect 200/302/whatever the local origin actually returns
+```
+
+If you get `502 Bad Gateway`, the SSH session is up but the local origin isn't listening — fix step 1 first.
+
+### 5. Teardown
+
+```bash
+kill "$(cat /tmp/pinggy-8000.pid)"
+# or, if the pid file got lost:
+pkill -f 'ssh -p 443 .* free@a\.pinggy\.io'
+```
+
+If you have a session_id from `terminal(background=True)`, prefer `process(action='kill', session_id=...)`.
+
+## Access Control via Username Keywords
+
+Pinggy stacks control flags into the SSH username separated by `+`. Always quote the whole `user@host` argument when it contains a `+`:
+
+| Keyword | Effect |
+|---------|--------|
+| `b:user:pass` | HTTP Basic auth gate |
+| `k:token` | Bearer-token header gate (`Authorization: Bearer <token>`) |
+| `w:CIDR` | IP whitelist (single IP or CIDR, repeatable) |
+| `co` | Add `Access-Control-Allow-Origin: *` (CORS) |
+| `x:https` | Force HTTPS — auto-redirect HTTP to HTTPS |
+| `a:Name:Value` | Add request header |
+| `u:Name:Value` | Update request header |
+| `r:Name` | Remove request header |
+| `qr` | Print a QR code of the URL to stdout (handy for mobile sharing) |
+
+Combine freely: `"b:admin:secret+co+x:https+free@a.pinggy.io"`.
+
+## Web Debugger (optional)
+
+Pinggy can mirror the inbound traffic to `localhost:4300` for inspection. Add a local forward to the SSH command:
+
+```bash
+ssh -p 443 -L4300:localhost:4300 -R0:localhost:8000 free@a.pinggy.io
+```
+
+Then open `http://localhost:4300` in a browser to see live request/response pairs.
+
+## Pitfalls
+
+- **60-minute hard cap on the free tier.** The SSH session terminates at the 60-minute mark; the URL goes dead. For longer shares, either use `PINGGY_TOKEN` (Pro) or auto-restart with a shell loop (note that the URL changes on every restart for free-tier).
+- **Free-tier URL is random and changes on restart.** Don't bookmark it, don't paste it into a config file. Re-parse from the log each time.
+- **Concurrent free tunnels are limited to one per source IP.** Starting a second tunnel from the same machine usually kills the first. Pro tier lifts this.
+- **`+` in usernames must be quoted.** Bare `ssh ... b:admin:secret+free@a.pinggy.io` works in bash but breaks under shells that treat `+` specially or when assembled programmatically. Always wrap in double quotes.
+- **Don't tunnel anything sensitive without an access-control flag.** A bare HTTP tunnel is reachable by anyone with the URL. Use `b:`, `k:`, or `w:` for non-public services.
+- **`process(action='log')` may miss SSH banner output.** Pinggy prints the URLs and then the SSH session goes interactive. Always redirect to a logfile and `grep` the file directly — same pattern as `cloudflared-quick-tunnel`.
+- **Host-key prompt on first run.** Default OpenSSH config asks the user to accept Pinggy's host key. Always pass `-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null` for unattended runs.
+- **TCP and TLS tunnels return a `<subdomain>.a.pinggy.online:<port>` pair, not an https URL.** Parse with a different regex (`tcp://` and a port). Don't assume every Pinggy tunnel is HTTP.
+- **Pro mode requires the token as the username, not a flag.** Use `"$PINGGY_TOKEN+a.pinggy.io"` (no `free@`). With a token you can also add `:persistent` for a stable subdomain — see `pinggy.io/docs/`.
+
+## Recipes
+
+Composite patterns combining a local origin with a Pinggy tunnel. Each recipe is self-contained — start the origin, start the tunnel, parse the URL, hand it back to the user.
+
+### Recipe 1 — Receive a webhook callback
+
+Use this when an external service (Stripe, GitHub, Discord, AgentMail, etc.) needs to POST to a publicly reachable URL during a local task.
+
+```bash
+# 1. Tiny capturing server: every request gets appended to /tmp/webhook-hits.log
+cat >/tmp/webhook-server.py <<'PY'
+import http.server, json, datetime, pathlib
+LOG = pathlib.Path("/tmp/webhook-hits.log")
+class H(http.server.BaseHTTPRequestHandler):
+    def _capture(self):
+        n = int(self.headers.get("content-length") or 0)
+        body = self.rfile.read(n).decode("utf-8", "replace") if n else ""
+        rec = {"t": datetime.datetime.utcnow().isoformat(), "path": self.path,
+               "method": self.command, "headers": dict(self.headers), "body": body}
+        with LOG.open("a") as f: f.write(json.dumps(rec) + "\n")
+        self.send_response(200); self.send_header("content-type","application/json")
+        self.end_headers(); self.wfile.write(b'{"ok":true}\n')
+    def do_GET(self): self._capture()
+    def do_POST(self): self._capture()
+    def log_message(self,*a,**k): pass
+http.server.HTTPServer(("127.0.0.1", 18080), H).serve_forever()
+PY
+nohup python3 /tmp/webhook-server.py >/tmp/webhook-server.log 2>&1 &
+echo $! >/tmp/webhook-server.pid
+
+# 2. Tunnel — bearer-token-gate so randos can't pollute the capture log
+nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:18080 "k:$(openssl rand -hex 12)+free@a.pinggy.io" \
+    >/tmp/webhook-pinggy.log 2>&1 &
+echo $! >/tmp/webhook-pinggy.pid
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/webhook-pinggy.log | head -1)
+echo "Webhook URL: $URL"
+
+# 3. While the agent works, watch hits land
+tail -f /tmp/webhook-hits.log
+```
+
+Hand `$URL` to the service that needs to call you. Teardown: `kill $(cat /tmp/webhook-server.pid) $(cat /tmp/webhook-pinggy.pid)`.
+
+### Recipe 2 — Expose an MCP server over HTTP/SSE
+
+Use when a remote MCP client (Claude Desktop on another machine, a teammate's editor, etc.) needs to reach an MCP server running on the local box. Only works for MCP servers that speak HTTP transport — stdio-mode servers can't be tunneled.
+
+```bash
+# 1. Start the MCP server in HTTP mode (example: a FastMCP server on port 8765)
+nohup python3 my_mcp_server.py --transport http --port 8765 \
+    >/tmp/mcp-server.log 2>&1 &
+echo $! >/tmp/mcp-server.pid
+
+# 2. Tunnel with a bearer token — MCP traffic should not be open to the internet
+TOKEN=$(openssl rand -hex 16)
+nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:8765 "k:$TOKEN+free@a.pinggy.io" \
+    >/tmp/mcp-pinggy.log 2>&1 &
+echo $! >/tmp/mcp-pinggy.pid
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/mcp-pinggy.log | head -1)
+echo "MCP URL: $URL"
+echo "Bearer token: $TOKEN"
+```
+
+The remote client connects to `$URL` with `Authorization: Bearer $TOKEN`. Hermes' own native MCP client config: `{"transport": "http", "url": "<URL>", "headers": {"Authorization": "Bearer <TOKEN>"}}`.
+
+### Recipe 3 — Expose a local LLM endpoint (Ollama / vLLM / llama.cpp)
+
+Share a local model with a remote caller (another agent, a phone, a teammate). Ollama listens on `:11434`, vLLM and llama.cpp typically on `:8000`.
+
+```bash
+# Pre-req: the model server is already running on 127.0.0.1:11434 (Ollama default)
+TOKEN=$(openssl rand -hex 16)
+nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:11434 "k:$TOKEN+co+free@a.pinggy.io" \
+    >/tmp/llm-pinggy.log 2>&1 &
+echo $! >/tmp/llm-pinggy.pid
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/llm-pinggy.log | head -1)
+echo "Endpoint: $URL"
+echo "Token:    $TOKEN"
+
+# Verify
+curl -s "$URL/api/tags" -H "Authorization: Bearer $TOKEN" | head
+```
+
+`co` enables CORS so a browser caller can hit the endpoint. Drop `co` for backend-only callers. For an OpenAI-compatible vLLM/llama.cpp endpoint, callers use base URL `$URL/v1` with `Authorization: Bearer $TOKEN` — but note Pinggy strips/replaces nothing in the body, so the model server itself sees Pinggy's token; the local server should be configured to ignore auth (it's already on `127.0.0.1`) and let Pinggy do the gating.
+
+### Recipe 4 — Share a dev server with a one-shot password
+
+The fastest "let a teammate poke at my running app" pattern. Random password, prints once, dies when you Ctrl-C.
+
+```bash
+PASS=$(openssl rand -base64 12 | tr -d '+/=' | head -c 12)
+echo "Dev server password: $PASS"
+ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+    -o ServerAliveInterval=30 \
+    -R0:localhost:3000 "b:dev:$PASS+co+x:https+free@a.pinggy.io"
+# URL prints to the terminal. Share URL + password. Ctrl-C to tear down.
+```
+
+`b:dev:$PASS` gates the URL with HTTP Basic auth. `x:https` forces TLS. `co` adds CORS for SPA frontends.
+
+## Verification
+
+```bash
+# End-to-end: spin up a trivial origin, tunnel it, hit it, tear down
+python3 -m http.server 18000 --bind 127.0.0.1 >/tmp/origin.log 2>&1 &
+ORIGIN_PID=$!
+
+nohup ssh -p 443 \
+    -o StrictHostKeyChecking=no \
+    -o UserKnownHostsFile=/dev/null \
+    -R0:localhost:18000 free@a.pinggy.io >/tmp/pinggy-verify.log 2>&1 &
+SSH_PID=$!
+
+sleep 5
+URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/pinggy-verify.log | head -1)
+echo "URL: $URL"
+curl -sI "$URL/" | head -1
+
+kill "$SSH_PID" "$ORIGIN_PID"
+```
+
+Expected: a `pinggy.link` URL and `HTTP/2 200` on the curl head.
diff --git a/website/sidebars.ts b/website/sidebars.ts
index e8bc7ccb13e..2f870a97696 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -423,6 +423,7 @@ const sidebars: SidebarsConfig = {
                   items: [
                     'user-guide/skills/optional/devops/devops-cli',
                     'user-guide/skills/optional/devops/devops-docker-management',
+                    'user-guide/skills/optional/devops/devops-pinggy-tunnel',
                     'user-guide/skills/optional/devops/devops-watchers',
                   ],
                 },

From 70b663504fee1d58a6763e862df478cf101fe51e Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 16 May 2026 00:28:12 -0500
Subject: [PATCH 114/218] fix(tui): keep Ink displayCursor in sync with
 fast-echo writes so cursor stops drifting (#26717)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(tui): keep Ink displayCursor in sync with fast-echo writes so cursor stops drifting

TextInput's fast-echo bypass writes characters directly to stdout to
avoid waiting on a React re-render for each keystroke. The hardware
cursor advances by text.length cells, but Ink's cached `displayCursor`
(the basis for the next frame's relative cursor-move preamble in
log-update) stayed unchanged. When ANY unrelated component re-rendered
between the fast-echo write and the deferred composer setCur/setParent
flush — status bar timer, streaming reasoning, etc. — the next frame's
preamble emitted a relative cursor move from a stale parked position
and the hardware cursor parked N cells offset from the actual caret.

Visible symptom: extra whitespace between the just-typed character and
the cursor block, intermittent, worse on long sessions during streaming.
Alt-screen was immune because frames begin with absolute CSI H.

This adds a small API in @hermes/ink:

  - `Ink.noteExternalCursorAdvance(dx, dy?)` — bumps displayCursor if
    set, otherwise seeds from frontFrame.cursor so the next preamble's
    relative move correctly cancels the external advance. No-op on
    alt-screen.
  - `CursorAdvanceContext` + `useCursorAdvance()` hook to expose it.

TextInput then calls `noteCursorAdvance(text.length)` after the
fast-echo `stdout.write(text)` append, and `noteCursorAdvance(-1)`
after the fast-backspace `\b \b` sequence.

Tests: 4 new vitest cases pin the API contract (bumps when set, seeds
from frontFrame.cursor when null, alt-screen no-op, zero-delta no-op).
All 751 ui-tui tests pass; tests/test_tui_gateway_server.py (177) pass.

* fix(tui): also advance cursorDeclaration so fast-echo survives deferred React state

Copilot review on PR #26717 flagged a gap in the original fix:
TextInput's fast-echo path defers the React `cur` state update by
16ms (perf optimization that batches re-renders during heavy typing).
Inside that window, `useDeclaredCursor` still publishes a target
computed from the PRE-keystroke `cur` — `cursorLayout(display, cur,
columns)`. Advancing only `displayCursor` would let any unrelated
re-render in that 16ms window run onRender's cursor-park branch with
the stale declaration and visually undo the fast-echo's advance.

The fix is symmetric: `noteExternalCursorAdvance` now bumps BOTH
`displayCursor` (the log-update relative-move basis) AND, if non-null,
`cursorDeclaration.relativeX/Y` (the target the cursor parks at after
every frame). When React finally flushes `setCur`, `useDeclaredCursor`
publishes a fresh declaration that supersedes our bumped one — exactly
what we want.

Adds two new vitest cases covering both halves:
  - active declaration advances in lock-step with displayCursor
  - null declaration stays null (no spurious bump)

All 753 ui-tui tests pass; tests/test_tui_gateway_server.py (177) pass.

Closes review threads:
  PRRT_kwDOPRF1G86ChKtD (textInput.tsx:1016 fast-echo append)
  PRRT_kwDOPRF1G86ChKtF (textInput.tsx:924 fast-backspace)
  PRRT_kwDOPRF1G86ChKtG (ink-cursor-advance.test.ts:57 missing coverage)

* fix(tui): make fast-echo survive TextInput rerenders + alt-screen (Copilot round 2)

Round 2 of PR #26717 review. Three real holes Copilot flagged after the
initial cursorDeclaration bump:

1. alt-screen early-return skipped BOTH halves of the notifier. But the
   default TUI wraps the composer in <AlternateScreen> — that IS the
   production path. CSI H resets log-update's relative-move basis, but
   the alt-screen park branch uses absolute CUP =
   `rect.x + decl.relativeX`, so a stale declaration there still parks
   the cursor at the pre-keystroke caret. Fix: skip ONLY the
   displayCursor half on alt-screen; still bump cursorDeclaration.

2. TextInput's own rerender could clobber the Ink-level bump. The fast-
   echo path defers setCur by 16ms; if a parent state change rerenders
   TextInput in that window, the layout effect inside useDeclaredCursor
   reads the stale React `cur` state and re-publishes a declaration at
   the OLD column. Fix:
   `cursorLayout(display, curRef.current, columns)` — read the always-
   up-to-date ref, not the deferred state. useMemo dropped (compute is
   cheap, single-line wrap-text in the common case).

3. Tests bypassed the production wiring. Added two structural tests:
   - `still advances cursorDeclaration on alt-screen` in the Ink-level
     suite, asserting displayCursor stays put but the declaration
     advances by the delta.
   - `textInputCursorSourceOfTruth.test.ts` pins three structural
     invariants: layout reads curRef.current, never the bare `cur`
     state, and the fast-echo stdout.write calls remain paired with
     noteCursorAdvance(±N). Source-grep invariants > flaky Ink mount
     tests for this kind of regression.

757/757 ui-tui tests pass (+3 over round 1). type-check clean. lint
introduces zero new errors on touched files. tests/test_tui_gateway_server.py
(177) pass.

Closes review threads:
  PRRT_kwDOPRF1G86ChOG2 (ink.tsx alt-screen guard)
  PRRT_kwDOPRF1G86ChOG9 (textInput.tsx fast-backspace rerender window)
  PRRT_kwDOPRF1G86ChOHC (textInput.tsx fast-append rerender window)
  PRRT_kwDOPRF1G86ChOHJ (alt-screen test asserts wrong invariant)
  PRRT_kwDOPRF1G86ChOHP (missing integration-style coverage)

* fix(tui): reject fast-backspace at soft-wrap boundary (Copilot round 3)

PR #26717 round 3. Copilot caught two real things:

1. `\b \b` cannot move the terminal cursor onto the previous visual
   row across a soft-wrap boundary. When the caret sits at visual
   column 0 of a wrapped row (e.g. value 'hello ' at width 6 →
   cursorLayout produces (line 1, col 0)), backspace would leave the
   physical cursor in place while the logical caret moves up to the
   end of the previous visual line. `noteCursorAdvance(-1)` would then
   feed Ink a wrong delta. Fix: `canFastBackspaceShape` now takes the
   composer width and rejects when `cursorLayout(value, cursor, columns).column === 0`.
   The fast path falls through to the normal Ink render, which
   correctly lays out the new caret position. The PR-description
   inconsistency about alt-screen is fixed in a separate gh pr edit.

Adds 4 new tests in textInputFastEcho.test.ts pinning the rejection at
exact-multiple wrap boundaries plus a positive control inside a
wrapped line and a back-compat case where `columns` is omitted.

761/761 ui-tui tests pass. type-check / lint clean. 177/177 Python
tests/test_tui_gateway_server.py pass.

Closes review threads:
  PRRT_kwDOPRF1G86ChxE5 (textInput.tsx:933 wrap-boundary regression)

* fix(tui): polish doc + tests after Copilot round 4

Three polish points Copilot raised:

1. canFastBackspaceShape doc comment overstated the legacy contract —
   said it conservatively rejects potential wrap boundaries when
   columns is omitted, but the implementation actually skips the
   wrap-boundary check entirely. Reworded to make the legacy behavior
   explicit and warn callers not to rely on protection they don't get.

2. ink-cursor-advance.test.ts rationale comment for the
   'advances cursorDeclaration in lock-step' case still referenced
   the pre-fix `cursorLayout(display, cur, columns)` expression. Now
   accurately describes the current source of truth — `curRef.current`
   in textInput.tsx — and explains the window the bump is bridging.

3. Removed the three `__get*ForTest` accessors from Ink. The test
   file already cast the instance to inspect private state in the
   couple of tests that needed declaration mutation; the rest now use
   a small `peek(ink)` helper that does the same cast for reads. No
   test-only API surface ships in production.

761/761 ui-tui tests pass. type-check clean. lint introduces zero new
errors on touched files. 177/177 tests/test_tui_gateway_server.py pass.

Closes review threads:
  PRRT_kwDOPRF1G86Ch23W (canFastBackspaceShape doc accuracy)
  PRRT_kwDOPRF1G86Ch23f (stale test rationale)
  PRRT_kwDOPRF1G86Ch23p (test-only API surface in production)

* fix(tui): tighten doc + add dy test coverage (Copilot round 5)

Two polish points from round 5:

1. canFastBackspaceShape doc had two paragraphs that conflicted —
   the main 'Additionally rejects when the physical cursor sits at
   visual column 0' was stated unconditionally, then the columns-param
   paragraph qualified that it only happens when columns is passed.
   Reworked into clear 'When supplied / When omitted' branches with a
   concrete example value ('hello ' returns true without columns even
   though it would be unsafe at width 6). No more inconsistency.

2. Added a test asserting cursorDeclaration.relativeY advances when dy
   is non-zero. Existing tests exercised dy on displayCursor only.
   Newlines in fast-echoed text don't currently hit the bypass
   (canFastAppendShape rejects '\n'), but dy is part of the public
   notifier contract and must propagate symmetrically with dx so
   future callers get a fully-implemented contract.

762/762 ui-tui tests pass (+1). type-check / lint / build clean.

Closes review threads:
  PRRT_kwDOPRF1G86Ch6Sz (doc inconsistency)
  PRRT_kwDOPRF1G86Ch6TE (missing dy coverage on declaration)

* fix(tui): doc polish (Copilot round 6)

Four small but valid points:

1. textInputCursorSourceOfTruth.test.ts used bare 'fs'/'path'/'url'
   imports; the rest of ui-tui consistently uses the 'node:' prefix
   (see src/__tests__/useSessionLifecycle.test.ts, src/lib/editor.test.ts).
   Switched to node:fs / node:path / node:url to match convention.

2. CursorAdvanceContext.ts type-level doc described only displayCursor.
   The notifier intentionally also mutates the active cursorDeclaration
   and that's the only part that matters on alt-screen. Reworked the
   doc into a two-part 'updates both' summary with the alt-screen
   asymmetry called out explicitly.

3. use-cursor-advance.ts hook doc had the same problem. Same fix —
   document both pieces of state, both screen modes.

4. App.tsx onCursorAdvance prop comment was incomplete. Same fix —
   describe both state updates and the screen-mode asymmetry.

No behavior change. 762/762 ui-tui tests pass. type-check / lint /
build clean.

Closes review threads (auto-resolved on PR but valid critiques):
  PRRT_kwDOPRF1G86Ch926 (node: prefix on built-in imports)
  PRRT_kwDOPRF1G86Ch92_ (use-cursor-advance.ts doc)
  PRRT_kwDOPRF1G86Ch93H (CursorAdvanceContext.ts type doc)
  PRRT_kwDOPRF1G86Ch93J (App.tsx prop comment)
---
 ui-tui/packages/hermes-ink/index.d.ts         |   1 +
 .../packages/hermes-ink/src/entry-exports.ts  |   1 +
 .../hermes-ink/src/ink/components/App.tsx     |  17 +-
 .../ink/components/CursorAdvanceContext.ts    |  35 +++
 .../src/ink/hooks/use-cursor-advance.ts       |  33 +++
 .../src/ink/ink-cursor-advance.test.ts        | 234 ++++++++++++++++++
 ui-tui/packages/hermes-ink/src/ink/ink.tsx    |  81 ++++++
 .../textInputCursorSourceOfTruth.test.ts      |  50 ++++
 .../src/__tests__/textInputFastEcho.test.ts   |  38 +++
 ui-tui/src/components/textInput.tsx           |  61 ++++-
 ui-tui/src/types/hermes-ink.d.ts              |   1 +
 11 files changed, 547 insertions(+), 5 deletions(-)
 create mode 100644 ui-tui/packages/hermes-ink/src/ink/components/CursorAdvanceContext.ts
 create mode 100644 ui-tui/packages/hermes-ink/src/ink/hooks/use-cursor-advance.ts
 create mode 100644 ui-tui/packages/hermes-ink/src/ink/ink-cursor-advance.test.ts
 create mode 100644 ui-tui/src/__tests__/textInputCursorSourceOfTruth.test.ts

diff --git a/ui-tui/packages/hermes-ink/index.d.ts b/ui-tui/packages/hermes-ink/index.d.ts
index 637c4bb43b6..5d5ae9387c0 100644
--- a/ui-tui/packages/hermes-ink/index.d.ts
+++ b/ui-tui/packages/hermes-ink/index.d.ts
@@ -21,6 +21,7 @@ export { default as Text } from './src/ink/components/Text.tsx'
 export type { Props as TextProps } from './src/ink/components/Text.tsx'
 export type { Key } from './src/ink/events/input-event.ts'
 export { default as useApp } from './src/ink/hooks/use-app.ts'
+export { useCursorAdvance } from './src/ink/hooks/use-cursor-advance.ts'
 export { useDeclaredCursor } from './src/ink/hooks/use-declared-cursor.ts'
 export { default as useInput } from './src/ink/hooks/use-input.ts'
 export { useHasSelection, useSelection } from './src/ink/hooks/use-selection.ts'
diff --git a/ui-tui/packages/hermes-ink/src/entry-exports.ts b/ui-tui/packages/hermes-ink/src/entry-exports.ts
index 355faa16f97..d173e0c9bb1 100644
--- a/ui-tui/packages/hermes-ink/src/entry-exports.ts
+++ b/ui-tui/packages/hermes-ink/src/entry-exports.ts
@@ -12,6 +12,7 @@ export { default as ScrollBox } from './ink/components/ScrollBox.js'
 export { default as Spacer } from './ink/components/Spacer.js'
 export { default as Text } from './ink/components/Text.js'
 export { default as useApp } from './ink/hooks/use-app.js'
+export { useCursorAdvance } from './ink/hooks/use-cursor-advance.js'
 export { useDeclaredCursor } from './ink/hooks/use-declared-cursor.js'
 export { type RunExternalProcess, useExternalProcess, withInkSuspended } from './ink/hooks/use-external-process.js'
 export { default as useInput } from './ink/hooks/use-input.js'
diff --git a/ui-tui/packages/hermes-ink/src/ink/components/App.tsx b/ui-tui/packages/hermes-ink/src/ink/components/App.tsx
index 5851c4bef66..54892e3b7b1 100644
--- a/ui-tui/packages/hermes-ink/src/ink/components/App.tsx
+++ b/ui-tui/packages/hermes-ink/src/ink/components/App.tsx
@@ -33,6 +33,7 @@ import { DBP, DFE, DISABLE_MOUSE_TRACKING, EBP, EFE, SHOW_CURSOR } from '../term
 
 import AppContext from './AppContext.js'
 import { ClockProvider } from './ClockContext.js'
+import CursorAdvanceContext, { type CursorAdvanceNotifier } from './CursorAdvanceContext.js'
 import CursorDeclarationContext, { type CursorDeclarationSetter } from './CursorDeclarationContext.js'
 import ErrorOverview from './ErrorOverview.js'
 import StdinContext from './StdinContext.js'
@@ -100,6 +101,18 @@ type Props = {
   // Enables IME composition at the input caret and lets screen readers /
   // magnifiers track the input. Optional so testing.tsx doesn't stub it.
   readonly onCursorDeclaration?: CursorDeclarationSetter
+  // Receives notifications that the physical cursor was advanced out-of-band
+  // (e.g. TextInput's fast-echo bypass writing directly to stdout). The
+  // handler in ink.tsx updates two pieces of state from a single call:
+  //   - `displayCursor` (the relative-move basis log-update uses on the
+  //     next frame; skipped on alt-screen where CSI H resets it every
+  //     frame anyway), and
+  //   - the active `cursorDeclaration.relativeX/Y` (the target the cursor
+  //     parks at after every frame; bumped on BOTH screens because
+  //     onRender's alt-screen branch emits an absolute CUP from it and
+  //     a stale declaration there is still visibly wrong).
+  // Optional so testing.tsx doesn't need to stub it.
+  readonly onCursorAdvance?: CursorAdvanceNotifier
   // Dispatch a keyboard event through the DOM tree. Called for each
   // parsed key alongside the legacy EventEmitter path.
   readonly dispatchKeyboardEvent: (parsedKey: ParsedKey) => void
@@ -196,7 +209,9 @@ export default class App extends PureComponent<Props, State> {
             <TerminalFocusProvider>
               <ClockProvider>
                 <CursorDeclarationContext.Provider value={this.props.onCursorDeclaration ?? (() => {})}>
-                  {this.state.error ? <ErrorOverview error={this.state.error as Error} /> : this.props.children}
+                  <CursorAdvanceContext.Provider value={this.props.onCursorAdvance ?? (() => {})}>
+                    {this.state.error ? <ErrorOverview error={this.state.error as Error} /> : this.props.children}
+                  </CursorAdvanceContext.Provider>
                 </CursorDeclarationContext.Provider>
               </ClockProvider>
             </TerminalFocusProvider>
diff --git a/ui-tui/packages/hermes-ink/src/ink/components/CursorAdvanceContext.ts b/ui-tui/packages/hermes-ink/src/ink/components/CursorAdvanceContext.ts
new file mode 100644
index 00000000000..52566c1a917
--- /dev/null
+++ b/ui-tui/packages/hermes-ink/src/ink/components/CursorAdvanceContext.ts
@@ -0,0 +1,35 @@
+import { createContext } from 'react'
+
+/**
+ * Notify Ink that the physical terminal cursor was advanced by an
+ * out-of-band stdout.write (e.g. the TextInput fast-echo path).
+ *
+ * This is a two-part notification — calling it updates both:
+ *
+ *   1. Ink's cached `displayCursor` (the basis log-update uses to
+ *      compute relative cursor moves for the next frame's preamble).
+ *      Without this, the next frame's preamble starts from a stale
+ *      parked position and the diff is rendered N cells offset.
+ *      This half is SKIPPED on alt-screen — every alt-screen frame
+ *      begins with CSI H which absolutely repositions the cursor, so
+ *      the relative-move basis is reset for free.
+ *
+ *   2. Ink's active `cursorDeclaration` (the target the cursor parks
+ *      at after every frame, set by `useDeclaredCursor`). Without
+ *      this, an unrelated component re-rendering before the deferred
+ *      React state catches up would publish a stale declaration and
+ *      visually undo the fast-echo's advance. This half applies to
+ *      BOTH main-screen and alt-screen — on alt-screen the cursor-
+ *      park branch in onRender emits an absolute CUP to
+ *      `rect.x + decl.relativeX`, so a stale declaration there is
+ *      still wrong even though displayCursor is skipped.
+ *
+ * `dx`/`dy` are deltas in terminal cells (positive = right/down,
+ * negative = left/up). The caller is responsible for ensuring the
+ * physical cursor really did move by that amount.
+ */
+export type CursorAdvanceNotifier = (dx: number, dy?: number) => void
+
+const CursorAdvanceContext = createContext<CursorAdvanceNotifier>(() => {})
+
+export default CursorAdvanceContext
diff --git a/ui-tui/packages/hermes-ink/src/ink/hooks/use-cursor-advance.ts b/ui-tui/packages/hermes-ink/src/ink/hooks/use-cursor-advance.ts
new file mode 100644
index 00000000000..15831ed86ab
--- /dev/null
+++ b/ui-tui/packages/hermes-ink/src/ink/hooks/use-cursor-advance.ts
@@ -0,0 +1,33 @@
+import { useContext } from 'react'
+
+import CursorAdvanceContext, { type CursorAdvanceNotifier } from '../components/CursorAdvanceContext.js'
+
+/**
+ * Returns a function that notifies Ink the physical terminal cursor was
+ * advanced out-of-band (e.g. by a direct stdout.write from the
+ * TextInput fast-echo bypass).
+ *
+ * Calling the returned function updates two pieces of Ink state:
+ *
+ *   - `displayCursor` — the cached parked-cursor position log-update
+ *     uses as the relative-move basis for the next frame. Skipped on
+ *     alt-screen, where every frame's CSI H resets the cursor anyway.
+ *
+ *   - The active `cursorDeclaration` — the target the cursor parks at
+ *     after every frame. Bumped on BOTH main- and alt-screen, because
+ *     onRender's alt-screen park branch emits an absolute CUP from
+ *     this value and a stale declaration there is still visibly wrong.
+ *     The next React commit that publishes a fresh declaration
+ *     supersedes the bump.
+ *
+ * The caller is responsible for the stdout write itself; this hook
+ * only reports the resulting cursor delta. Pass `dx` and optional
+ * `dy` in terminal cells (positive = moved right/down, negative =
+ * moved left/up).
+ *
+ * If the host isn't an Ink render root (test stubs, non-Ink renderer)
+ * the returned callback is a safe no-op.
+ */
+export function useCursorAdvance(): CursorAdvanceNotifier {
+  return useContext(CursorAdvanceContext)
+}
diff --git a/ui-tui/packages/hermes-ink/src/ink/ink-cursor-advance.test.ts b/ui-tui/packages/hermes-ink/src/ink/ink-cursor-advance.test.ts
new file mode 100644
index 00000000000..a3cc1757ab6
--- /dev/null
+++ b/ui-tui/packages/hermes-ink/src/ink/ink-cursor-advance.test.ts
@@ -0,0 +1,234 @@
+import { EventEmitter } from 'events'
+
+import React from 'react'
+import { describe, expect, it } from 'vitest'
+
+import Text from './components/Text.js'
+import Ink from './ink.js'
+
+class FakeTty extends EventEmitter {
+  chunks: string[] = []
+  columns = 40
+  rows = 8
+  isTTY = true
+
+  write(chunk: string | Uint8Array, cb?: (err?: Error | null) => void): boolean {
+    this.chunks.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf8'))
+    cb?.()
+
+    return true
+  }
+}
+
+function makeInk() {
+  const stdout = new FakeTty()
+  const stdin = new FakeTty()
+  const stderr = new FakeTty()
+
+  const ink = new Ink({
+    exitOnCtrlC: false,
+    patchConsole: false,
+    stderr: stderr as unknown as NodeJS.WriteStream,
+    stdin: stdin as unknown as NodeJS.ReadStream,
+    stdout: stdout as unknown as NodeJS.WriteStream
+  })
+
+  return { ink, stdout, stdin, stderr }
+}
+
+// Cast helper instead of exposing __get*ForTest methods on production Ink —
+// these are internal frame/cursor caches we only inspect from tests.
+type InkPrivate = {
+  displayCursor: { x: number; y: number } | null
+  cursorDeclaration: { node: unknown; relativeX: number; relativeY: number } | null
+  frontFrame: { cursor: { x: number; y: number } }
+}
+const peek = (ink: Ink): InkPrivate => ink as unknown as InkPrivate
+
+// Closes the cursor-drift bug: when TextInput's fast-echo path writes a
+// printable character directly to stdout, the hardware cursor advances by
+// one cell BUT Ink's `displayCursor` cache (used as the basis for the
+// next frame's relative cursor preamble) wasn't being updated. On long
+// sessions an unrelated re-render (status bar timer, streaming
+// reasoning, etc.) would then park the hardware cursor N cells offset
+// from the actual caret — visible as "extra whitespace between my last
+// typed character and the cursor block".
+describe('Ink.noteExternalCursorAdvance', () => {
+  it('bumps an already-tracked displayCursor by the given delta', () => {
+    const { ink } = makeInk()
+
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+
+    // Seed a known parked position directly. In production this is set by
+    // the cursor-park branch in onRender when a useDeclaredCursor caller
+    // commits a declaration; this test bypasses React for hermeticity.
+    peek(ink).displayCursor = { x: 5, y: 0 }
+
+    ink.noteExternalCursorAdvance(3)
+    expect(peek(ink).displayCursor).toEqual({ x: 8, y: 0 })
+
+    ink.noteExternalCursorAdvance(-1)
+    expect(peek(ink).displayCursor).toEqual({ x: 7, y: 0 })
+
+    ink.noteExternalCursorAdvance(0, 2)
+    expect(peek(ink).displayCursor).toEqual({ x: 7, y: 2 })
+
+    ink.unmount()
+  })
+
+  it('seeds displayCursor from frontFrame.cursor when nothing was parked', () => {
+    const { ink } = makeInk()
+
+    ink.render(React.createElement(Text, null, 'hello'))
+    ink.onRender()
+
+    expect(peek(ink).displayCursor).toBeNull()
+    const base = { x: peek(ink).frontFrame.cursor.x, y: peek(ink).frontFrame.cursor.y }
+
+    ink.noteExternalCursorAdvance(4)
+    expect(peek(ink).displayCursor).toEqual({ x: base.x + 4, y: base.y })
+
+    ink.unmount()
+  })
+
+  it('is a no-op when the delta is zero', () => {
+    const { ink } = makeInk()
+
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+
+    ink.noteExternalCursorAdvance(0)
+    expect(peek(ink).displayCursor).toBeNull()
+
+    ink.noteExternalCursorAdvance(0, 0)
+    expect(peek(ink).displayCursor).toBeNull()
+
+    ink.unmount()
+  })
+
+  it('skips displayCursor on alt-screen — CSI H resets every frame', () => {
+    const { ink } = makeInk()
+
+    ink.setAltScreenActive(true)
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+    peek(ink).displayCursor = { x: 5, y: 0 }
+
+    ink.noteExternalCursorAdvance(3)
+
+    expect(peek(ink).displayCursor).toEqual({ x: 5, y: 0 })
+
+    ink.unmount()
+  })
+
+  // Closes Copilot follow-up on PR #26717: the default TUI wraps the
+  // composer in <AlternateScreen>, so alt-screen is the production
+  // path. CSI H only resets the log-update relative-move basis — the
+  // declared cursor target is still consulted by onRender's alt-screen
+  // park branch (`cursorPosition(row, col)` using rect + decl). So
+  // cursorDeclaration MUST advance on alt-screen too, even though
+  // displayCursor doesn't need to.
+  it('still advances cursorDeclaration on alt-screen', () => {
+    const { ink } = makeInk()
+
+    ink.setAltScreenActive(true)
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+
+    const fakeNode = {} as unknown as Record<string, unknown>
+
+    peek(ink).cursorDeclaration = { node: fakeNode, relativeX: 7, relativeY: 0 }
+    peek(ink).displayCursor = { x: 12, y: 0 }
+
+    ink.noteExternalCursorAdvance(3)
+
+    // displayCursor untouched on alt-screen
+    expect(peek(ink).displayCursor).toEqual({ x: 12, y: 0 })
+    // declaration still advanced — onRender's alt-screen park reads this
+    expect(peek(ink).cursorDeclaration).toEqual({ node: fakeNode, relativeX: 10, relativeY: 0 })
+
+    ink.unmount()
+  })
+
+  // Closes Copilot review feedback on PR #26717: even after the
+  // TextInput-level fix where layout reads `curRef.current` directly,
+  // there's still a window where a fast-echo wrote to stdout but the
+  // current cursor declaration on Ink (set by an earlier render's
+  // useDeclaredCursor commit) points at the PRE-keystroke caret
+  // column. If we advanced only `displayCursor`, an unrelated re-render
+  // in that window would re-run onRender's cursor-park branch with the
+  // stale declaration and visually undo the fast-echo's advance. We
+  // must bump BOTH so the cursor stays anchored to the physical caret
+  // until the next React commit publishes a fresh declaration
+  // (computed from `curRef.current` via the cursorLayout call in
+  // textInput.tsx) that supersedes the bump.
+  it('advances the active cursorDeclaration in lock-step with displayCursor', () => {
+    const { ink } = makeInk()
+
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+
+    const fakeNode = {} as unknown as Record<string, unknown>
+
+    peek(ink).cursorDeclaration = { node: fakeNode, relativeX: 7, relativeY: 0 }
+    peek(ink).displayCursor = { x: 12, y: 0 }
+
+    ink.noteExternalCursorAdvance(3)
+
+    expect(peek(ink).displayCursor).toEqual({ x: 15, y: 0 })
+    expect(peek(ink).cursorDeclaration).toEqual({ node: fakeNode, relativeX: 10, relativeY: 0 })
+
+    ink.noteExternalCursorAdvance(-1)
+    expect(peek(ink).displayCursor).toEqual({ x: 14, y: 0 })
+    expect(peek(ink).cursorDeclaration).toEqual({ node: fakeNode, relativeX: 9, relativeY: 0 })
+
+    ink.unmount()
+  })
+
+  // Closes Copilot follow-up on PR #26717: the dy half of the notifier
+  // contract was tested for `displayCursor` but not for
+  // `cursorDeclaration.relativeY`. Newlines in fast-echoed text never
+  // hit the bypass today (canFastAppendShape rejects '\n'), but `dy`
+  // is part of the public API and must propagate symmetrically with
+  // dx so future callers (e.g. multi-line paste shortcuts) don't get
+  // a half-implemented contract.
+  it('advances cursorDeclaration.relativeY when dy is non-zero', () => {
+    const { ink } = makeInk()
+
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+
+    const fakeNode = {} as unknown as Record<string, unknown>
+
+    peek(ink).cursorDeclaration = { node: fakeNode, relativeX: 2, relativeY: 1 }
+    peek(ink).displayCursor = { x: 4, y: 2 }
+
+    ink.noteExternalCursorAdvance(1, 3)
+
+    expect(peek(ink).displayCursor).toEqual({ x: 5, y: 5 })
+    expect(peek(ink).cursorDeclaration).toEqual({ node: fakeNode, relativeX: 3, relativeY: 4 })
+
+    // Negative dy too — cursor moving up across visual rows.
+    ink.noteExternalCursorAdvance(0, -2)
+    expect(peek(ink).displayCursor).toEqual({ x: 5, y: 3 })
+    expect(peek(ink).cursorDeclaration).toEqual({ node: fakeNode, relativeX: 3, relativeY: 2 })
+
+    ink.unmount()
+  })
+
+  it('leaves cursorDeclaration unchanged when no declaration is active', () => {
+    const { ink } = makeInk()
+
+    ink.render(React.createElement(Text, null, 'hi'))
+    ink.onRender()
+
+    expect(peek(ink).cursorDeclaration).toBeNull()
+
+    ink.noteExternalCursorAdvance(3)
+
+    expect(peek(ink).cursorDeclaration).toBeNull()
+
+    ink.unmount()
+  })
+})
diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx
index 8cdfe781395..49fdf704488 100644
--- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx
+++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx
@@ -16,6 +16,7 @@ import { logError } from '../utils/log.js'
 
 import { colorize } from './colorize.js'
 import App from './components/App.js'
+import type { CursorAdvanceNotifier } from './components/CursorAdvanceContext.js'
 import type { CursorDeclaration, CursorDeclarationSetter } from './components/CursorDeclarationContext.js'
 import { FRAME_INTERVAL_MS } from './constants.js'
 import * as dom from './dom.js'
@@ -2219,6 +2220,85 @@ export default class Ink {
 
     this.cursorDeclaration = decl
   }
+  // Caller writes raw bytes to stdout that move the physical terminal
+  // cursor (e.g. TextInput's fast-echo bypass). Without this notification,
+  // Ink's `displayCursor` cache and log-update's prevFrame.cursor stay
+  // unchanged, so the next frame's relative cursor moves compute from a
+  // stale position and the hardware cursor parks `dx` cells offset from
+  // the actual caret. Visible symptom: extra whitespace between the just-
+  // typed character and the cursor block, more pronounced on long
+  // sessions where unrelated components re-render between fast-echo and
+  // the deferred composer re-render.
+  //
+  // If displayCursor was already tracked, just bump it. Otherwise seed it
+  // to (prevFrame.cursor + delta) so the next frame's preamble emits a
+  // (-dx, -dy) relative move that brings the cursor back to log-update's
+  // expected start position before the diff body runs.
+  //
+  // Public so tests can drive it directly without mounting App.
+  //
+  // Bumps BOTH `displayCursor` (used by log-update's relative-move
+  // preamble) AND, if non-null, `cursorDeclaration.relativeX/Y` (the
+  // target the cursor parks at after every frame). Advancing only one
+  // of the two would leave the other stale: e.g. if the deferred React
+  // `setCur` hasn't flushed yet, the next unrelated re-render would
+  // re-compute `target` from the stale declaration and park the
+  // hardware cursor back at the old caret column. We advance both so
+  // the fast-echo is invisible to intervening frames until React
+  // catches up.
+  noteExternalCursorAdvance: CursorAdvanceNotifier = (dx, dy = 0) => {
+    if (dx === 0 && dy === 0) {
+      return
+    }
+
+    // displayCursor / log-update relative-move basis only matters on
+    // main screen — alt-screen frames begin with absolute CSI H every
+    // frame so the next preamble naturally resets to (0,0). cursorDeclaration,
+    // however, IS still consulted on alt-screen — onRender's park branch
+    // emits an absolute CUP using `rect.x + decl.relativeX`, so a stale
+    // declaration in the deferred-setCur window would park the cursor
+    // at the pre-keystroke caret. We therefore skip ONLY the displayCursor
+    // half on alt-screen, not the declaration half.
+    if (!this.altScreenActive) {
+      if (this.displayCursor !== null) {
+        this.displayCursor = {
+          x: this.displayCursor.x + dx,
+          y: this.displayCursor.y + dy
+        }
+      } else {
+        // No prior parked position. Seed from frontFrame.cursor (where
+        // log-update parked the cursor at the end of the last frame) so
+        // the next preamble's relative move correctly cancels the
+        // external advance.
+        const baseX = this.frontFrame.cursor.x
+        const baseY = this.frontFrame.cursor.y
+        this.displayCursor = { x: baseX + dx, y: baseY + dy }
+      }
+    }
+
+    // Also advance the active cursor declaration if any. Without this,
+    // a TextInput that defers its React `cur` state update (16ms timer
+    // in textInput.tsx — perf optimization that batches re-renders
+    // during heavy typing) leaves `cursorDeclaration.relativeX` pointing
+    // at the pre-keystroke caret column. If an unrelated component
+    // re-renders before the deferred `setCur` flushes, the cursor-park
+    // branch at the end of onRender would move the hardware cursor back
+    // to that stale relativeX and visually undo the fast-echo's
+    // advance. Bumping relativeX here keeps the declared target in
+    // lock-step with the physical cursor until React state catches up.
+    // Applies to BOTH main-screen and alt-screen — the alt-screen park
+    // branch uses an absolute CUP to (rect.x + decl.relativeX), so a
+    // stale declaration there would still produce the wrong column.
+    const decl = this.cursorDeclaration
+
+    if (decl !== null) {
+      this.cursorDeclaration = {
+        node: decl.node,
+        relativeX: decl.relativeX + dx,
+        relativeY: decl.relativeY + dy
+      }
+    }
+  }
   render(node: ReactNode): void {
     this.currentNode = node
 
@@ -2228,6 +2308,7 @@ export default class Ink {
         exitOnCtrlC={this.options.exitOnCtrlC}
         getHyperlinkAt={this.getHyperlinkAt}
         onClickAt={this.dispatchClick}
+        onCursorAdvance={this.noteExternalCursorAdvance}
         onCursorDeclaration={this.setCursorDeclaration}
         onExit={this.unmount}
         onHoverAt={this.dispatchHover}
diff --git a/ui-tui/src/__tests__/textInputCursorSourceOfTruth.test.ts b/ui-tui/src/__tests__/textInputCursorSourceOfTruth.test.ts
new file mode 100644
index 00000000000..b52894d1587
--- /dev/null
+++ b/ui-tui/src/__tests__/textInputCursorSourceOfTruth.test.ts
@@ -0,0 +1,50 @@
+import { readFileSync } from 'node:fs'
+import { dirname, join } from 'node:path'
+import { fileURLToPath } from 'node:url'
+
+import { describe, expect, it } from 'vitest'
+
+// Locate textInput.tsx relative to this test file so the assertion
+// survives moves of the test fixture itself.
+const TEXT_INPUT_PATH = join(dirname(fileURLToPath(import.meta.url)), '..', 'components', 'textInput.tsx')
+const source = readFileSync(TEXT_INPUT_PATH, 'utf8')
+
+// Closes Copilot follow-up on PR #26717: the original cursor-drift
+// fix bumped Ink's displayCursor / cursorDeclaration on fast-echo, but
+// if TextInput itself re-renders before the deferred 16ms `setCur`
+// flushes (parent state change, status-bar tick, spinner) the layout
+// effect inside `useDeclaredCursor` re-publishes a declaration
+// computed from the STALE React `cur` state and clobbers the Ink-level
+// bump. The fix is structural: read `curRef.current` (always
+// up-to-date) when computing the layout, not the `cur` state.
+//
+// This file pins that invariant. Switching back to `cur` state — or
+// re-introducing a memo keyed on `cur` that uses `curRef.current`
+// inside but stops re-computing on rerender — is a regression and
+// should be caught here, not via a flaky integration test that mounts
+// Ink + stdin.
+describe('textInput cursor-layout source of truth', () => {
+  it('reads curRef.current (not the cur React state) for cursorLayout', () => {
+    // The line we care about. We allow whitespace / formatting drift,
+    // but the call itself must use `curRef.current`.
+    expect(source).toMatch(/cursorLayout\(\s*display\s*,\s*curRef\.current\s*,\s*columns\s*\)/)
+  })
+
+  it('does not pass the bare `cur` React state into cursorLayout', () => {
+    // Any `cursorLayout(display, cur, columns)` invocation would
+    // reintroduce the stale-declaration window.
+    expect(source).not.toMatch(/cursorLayout\(\s*display\s*,\s*cur\s*,\s*columns\s*\)/)
+  })
+
+  it('keeps the fast-echo notifier calls paired with the stdout writes', () => {
+    // Both fast-echo paths must call noteCursorAdvance, otherwise Ink
+    // never learns about the out-of-band write and drifts again. We
+    // tolerate explanatory comments in between (the rationale block is
+    // intentionally long), but the pairing itself must hold.
+    const backspacePattern = /stdout!\.write\(['"`]\\b \\b['"`]\)[\s\S]{0,1000}?noteCursorAdvance\(-1\)/
+    expect(source).toMatch(backspacePattern)
+
+    const appendPattern = /stdout!\.write\(text\)[\s\S]{0,1000}?noteCursorAdvance\(text\.length\)/
+    expect(source).toMatch(appendPattern)
+  })
+})
diff --git a/ui-tui/src/__tests__/textInputFastEcho.test.ts b/ui-tui/src/__tests__/textInputFastEcho.test.ts
index 7f246f19f21..2e08111ffb4 100644
--- a/ui-tui/src/__tests__/textInputFastEcho.test.ts
+++ b/ui-tui/src/__tests__/textInputFastEcho.test.ts
@@ -133,4 +133,42 @@ describe('canFastBackspaceShape', () => {
   it('rejects deleting an emoji', () => {
     expect(canFastBackspaceShape('hi🙂', 'hi🙂'.length)).toBe(false)
   })
+
+  // Closes Copilot PR #26717 round 3: the "\b \b" sequence cannot move
+  // the terminal cursor onto the previous visual row across a
+  // soft-wrap boundary. When the caret sits at visual column 0 of a
+  // wrapped row (column == 0 in the computed cursor layout), backspace
+  // would leave the physical cursor in place while the logical caret
+  // moves up to the end of the previous visual line — desyncing both
+  // Ink's displayCursor model and the user-visible position. The fast
+  // path must fall through in that case so the normal Ink render path
+  // can lay out the correct cursor position.
+  it('rejects fast-backspace at a soft-wrap boundary when columns is known', () => {
+    // value width 6 in a column of 6 → cursorLayout produces (line 1, col 0)
+    // i.e. the caret has overflowed onto the next visual line.
+    const value = 'hello '
+    expect(canFastBackspaceShape(value, value.length, 6)).toBe(false)
+  })
+
+  it('rejects fast-backspace at an exact multiple of columns (wide wrap)', () => {
+    // 12 chars at width 6 → two full visual rows, caret at (line 2, col 0).
+    const value = 'abcdefghijkl'
+    expect(canFastBackspaceShape(value, value.length, 6)).toBe(false)
+  })
+
+  it('still accepts fast-backspace inside a wrapped line', () => {
+    // Caret mid-visual-line — "\b \b" can move the cursor one cell left
+    // without crossing a wrap boundary.
+    expect(canFastBackspaceShape('hello world', 'hello world'.length, 20)).toBe(true)
+    expect(canFastBackspaceShape('abcdefghi', 9, 6)).toBe(true) // visual line 1, col 3 → ok
+  })
+
+  it('skips the wrap-boundary check when columns is omitted (legacy contract)', () => {
+    // Callers that don't pass `columns` fall back to the pre-wrap-aware
+    // behavior — the function does NOT magically reject anything that
+    // could be a wrap boundary without the width. Production callers
+    // must always pass `columns`; this case is for unit tests of the
+    // pre-wrap shape contract.
+    expect(canFastBackspaceShape('hello ', 'hello '.length)).toBe(true)
+  })
 })
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 91e109fa366..b3c79357368 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -16,13 +16,14 @@ import {
 
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
+  useCursorAdvance: () => (dx: number, dy?: number) => void
   useDeclaredCursor: (a: { line: number; column: number; active: boolean }) => (el: any) => void
   useStdout: () => { stdout?: NodeJS.WriteStream }
   useTerminalFocus: () => boolean
 }
 
 const ink = Ink as unknown as InkExt
-const { Box, Text, useStdin, useInput, useStdout, stringWidth, useDeclaredCursor, useTerminalFocus } = ink
+const { Box, Text, useStdin, useInput, useStdout, stringWidth, useCursorAdvance, useDeclaredCursor, useTerminalFocus } = ink
 
 const ESC = '\x1b'
 const INV = `${ESC}[7m`
@@ -238,8 +239,26 @@ export function canFastAppendShape(
  * ASCII. Anything else (combining marks, IME compositions, wide chars,
  * tabs, ANSI fragments) goes through the normal render path so Ink can
  * recompute cell widths.
+ *
+ * When `columns` is supplied, ALSO rejects when the physical cursor
+ * sits at visual column 0 — i.e., right after a soft-wrap boundary.
+ * The "\b \b" sequence cannot move the cursor onto the previous visual
+ * row (terminals don't back-step across line wraps), so the physical
+ * cursor would stay put while the logical caret moves to the end of
+ * the previous visual line, desyncing both Ink's `displayCursor` model
+ * and the user-visible position.
+ *
+ * When `columns` is OMITTED, the wrap-boundary check is skipped
+ * entirely and the function reverts to the legacy non-wrap-aware
+ * contract — values like `'hello '` will return `true` even though
+ * they would be unsafe at a width of 6. Production callers (the
+ * composer's `canFastBackspace` helper) always pass `columns`;
+ * `columns` is optional only so unit tests of the pre-wrap shape
+ * contract can keep calling the helper without threading width
+ * through. Do NOT omit it from any new caller that relies on the
+ * wrap-boundary protection.
  */
-export function canFastBackspaceShape(current: string, cursor: number): boolean {
+export function canFastBackspaceShape(current: string, cursor: number, columns?: number): boolean {
   if (cursor !== current.length) {
     return false
   }
@@ -252,6 +271,13 @@ export function canFastBackspaceShape(current: string, cursor: number): boolean
     return false
   }
 
+  // If we know the wrap width, reject at the soft-wrap boundary: the
+  // caret's visual column is 0, so "\b \b" can't represent the physical
+  // move back to the previous visual line.
+  if (columns !== undefined && cursorLayout(current, cursor, columns).column === 0) {
+    return false
+  }
+
   const removed = current.slice(prevPos(current, cursor), cursor)
 
   return ASCII_PRINTABLE_RE.test(removed)
@@ -333,6 +359,7 @@ export function TextInput({
   const fwdDel = useFwdDelete(focus)
   const termFocus = useTerminalFocus()
   const { stdout } = useStdout()
+  const noteCursorAdvance = useCursorAdvance()
 
   const curRef = useRef(cur)
   const selRef = useRef<null | { end: number; start: number }>(null)
@@ -368,7 +395,19 @@ export function TextInput({
     [sel]
   )
 
-  const layout = useMemo(() => cursorLayout(display, cur, columns), [columns, cur, display])
+  // Read `curRef.current` (always up-to-date) rather than the `cur`
+  // React state. The fast-echo path defers the React `setCur` by 16ms
+  // to batch re-renders during heavy typing; if an unrelated render
+  // flushes this component during that window and we used the stale
+  // `cur` state here, the layout effect inside `useDeclaredCursor`
+  // would publish a stale cursor declaration and clobber the Ink-level
+  // bump from `noteCursorAdvance(...)`. `cur` is still in scope and
+  // referenced by setSel/setCur paths below, so React tracks the
+  // dependency naturally — we just don't use it as the source of truth
+  // for layout. The cursorLayout call is cheap (one wrap-text pass
+  // over a single-line string in the common case), so dropping useMemo
+  // is fine.
+  const layout = cursorLayout(display, curRef.current, columns)
 
   const boxRef = useDeclaredCursor({
     line: layout.line,
@@ -526,7 +565,7 @@ export function TextInput({
     canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current)
 
   const canFastBackspace = (current: string, cursor: number) =>
-    canFastEchoBase() && canFastBackspaceShape(current, cursor)
+    canFastEchoBase() && canFastBackspaceShape(current, cursor, columns)
 
   const commit = (
     next: string,
@@ -911,6 +950,12 @@ export function TextInput({
           v = v.slice(0, t) + v.slice(c)
           c = t
           stdout!.write('\b \b')
+          // The "\b \b" sequence ends with the cursor one column to the
+          // LEFT of where Ink last parked it. Tell Ink so its `displayCursor`
+          // (and log-update's relative-move basis on the next frame) stays
+          // in sync — otherwise the cursor parks one cell to the right of
+          // the caret on the next unrelated re-render.
+          noteCursorAdvance(-1)
           commit(v, c, true, false, false, Math.max(0, lineWidthRef.current - 1))
 
           return
@@ -998,6 +1043,14 @@ export function TextInput({
 
             if (simpleAppend) {
               stdout!.write(text)
+              // ASCII-printable text advances the physical cursor by exactly
+              // text.length cells (canFastAppendShape rejects non-ASCII,
+              // wide chars, newlines). Notify Ink so the cached displayCursor
+              // / log-update relative-move basis advances with it; otherwise
+              // any unrelated re-render that happens before the 16ms
+              // setCur/setParent flush parks the cursor text.length cells
+              // too far right (#cursor-drift).
+              noteCursorAdvance(text.length)
               commit(v, c, true, false, false, lineWidthRef.current + stringWidth(text))
 
               return
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index b84f843d322..ca2a05dc449 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -164,6 +164,7 @@ declare module '@hermes/ink' {
     readonly column: number
     readonly active: boolean
   }): (el: unknown) => void
+  export function useCursorAdvance(): (dx: number, dy?: number) => void
   export function useStdin(): {
     readonly stdin: NodeJS.ReadStream
     readonly setRawMode: (value: boolean) => void

From 627f8a5f1dab2847a5fb97fa79daa6d0bc96d8bd Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 00:57:39 -0700
Subject: [PATCH 115/218] security: sanitize tool error strings before
 injecting into model context (#26823)

Adds _sanitize_tool_error() in model_tools and routes both error paths
through it: registry.dispatch's try/except (the primary path for tool
exceptions) and handle_function_call's outer except (defense in depth).

Stripping targets structural framing tokens that the model itself can
react to even though json.dumps already handles wire-layer escaping:
XML role tags (tool_call, function_call, result, response, output,
input, system, assistant, user), CDATA sections, and markdown code
fences. Caps message body at 2000 chars and wraps with [TOOL_ERROR]
prefix.

Defense-in-depth: a tool exception carrying '<tool_call>...' won't
break message framing (json escapes it), but the model still reads
those tokens and they nudge it toward role-confusion framing.

Ported from ironclaw#1639 (one piece of #3838's three-feature scout).
The truncated-tool-call (#1632) and empty-response-recovery (#1677,
#1720) pieces are skipped because main now implements both far more
thoroughly (run_agent.py L8147/L12209/L13012 for truncation retry +
length rewrite; L4500/L15090+ for empty-response scaffolding stripper,
multi-stage nudge, fallback model activation).
---
 model_tools.py                    |  45 +++++++++-
 tests/test_sanitize_tool_error.py | 137 ++++++++++++++++++++++++++++++
 tools/registry.py                 |  11 ++-
 3 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_sanitize_tool_error.py

diff --git a/model_tools.py b/model_tools.py
index db19bb67e53..1cbc83096ac 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -21,6 +21,7 @@ Public API (signatures preserved from the original 2,400-line version):
 """
 
 import json
+import re
 import asyncio
 import logging
 import threading
@@ -485,6 +486,48 @@ _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
 _READ_SEARCH_TOOLS = {"read_file", "search_files"}
 
 
+# =========================================================================
+# Tool error sanitization
+# =========================================================================
+#
+# Tool exceptions can carry arbitrary text into the model's context as the
+# `tool` message content. json.dumps() handles quote/backslash escaping so a
+# raw injection of `</tool_call>` won't break message framing, but the model
+# still *reads* those tokens and they can confuse downstream tool-call
+# parsing or, in adversarial cases, nudge it toward role-confusion framing.
+#
+# This helper strips structural framing tokens (XML role tags, CDATA,
+# markdown code fences) and caps the message at a sane upper bound before it
+# becomes part of the conversation. It's defense-in-depth — the json layer
+# already prevents framing escape — but cheap and worth having.
+#
+# Ported from ironclaw#1639.
+_TOOL_ERROR_ROLE_TAG_RE = re.compile(
+    r'</?(?:tool_call|function_call|result|response|output|input|system|assistant|user)>',
+    re.IGNORECASE,
+)
+_TOOL_ERROR_FENCE_OPEN_RE = re.compile(r'^\s*```(?:json|xml|html|markdown)?\s*', re.MULTILINE)
+_TOOL_ERROR_FENCE_CLOSE_RE = re.compile(r'\s*```\s*$', re.MULTILINE)
+_TOOL_ERROR_CDATA_RE = re.compile(r'<!\[CDATA\[.*?\]\]>', re.DOTALL)
+_TOOL_ERROR_MAX_LEN = 2000
+
+
+def _sanitize_tool_error(error_msg: str) -> str:
+    """Strip structural framing tokens from a tool error before showing it to the model.
+
+    See _TOOL_ERROR_ROLE_TAG_RE docstring above for rationale.
+    """
+    if not error_msg:
+        return "[TOOL_ERROR] "
+    sanitized = _TOOL_ERROR_ROLE_TAG_RE.sub("", error_msg)
+    sanitized = _TOOL_ERROR_FENCE_OPEN_RE.sub("", sanitized)
+    sanitized = _TOOL_ERROR_FENCE_CLOSE_RE.sub("", sanitized)
+    sanitized = _TOOL_ERROR_CDATA_RE.sub("", sanitized)
+    if len(sanitized) > _TOOL_ERROR_MAX_LEN:
+        sanitized = sanitized[:_TOOL_ERROR_MAX_LEN - 3] + "..."
+    return f"[TOOL_ERROR] {sanitized}"
+
+
 # =========================================================================
 # Tool argument type coercion
 # =========================================================================
@@ -824,7 +867,7 @@ def handle_function_call(
     except Exception as e:
         error_msg = f"Error executing {function_name}: {str(e)}"
         logger.exception(error_msg)
-        return json.dumps({"error": error_msg}, ensure_ascii=False)
+        return json.dumps({"error": _sanitize_tool_error(error_msg)}, ensure_ascii=False)
 
 
 # =============================================================================
diff --git a/tests/test_sanitize_tool_error.py b/tests/test_sanitize_tool_error.py
new file mode 100644
index 00000000000..3a0685bf3d7
--- /dev/null
+++ b/tests/test_sanitize_tool_error.py
@@ -0,0 +1,137 @@
+"""Tests for `_sanitize_tool_error` in model_tools.
+
+Ported from ironclaw#1639 — defense-in-depth on tool exception strings before
+they enter the model's `tool` message content. Note that `json.dumps()` in
+`handle_function_call` already handles quote/backslash escaping at the wire
+layer; this helper exists to strip structural framing tokens the model
+itself might react to (XML role tags, CDATA, markdown code fences) and to
+cap pathological lengths.
+"""
+from __future__ import annotations
+
+from model_tools import _sanitize_tool_error, _TOOL_ERROR_MAX_LEN
+
+
+class TestRoleTagStripping:
+    def test_strips_tool_call_tags(self):
+        out = _sanitize_tool_error("bad <tool_call>injected</tool_call> happened")
+        assert "<tool_call>" not in out
+        assert "</tool_call>" not in out
+        assert "bad injected happened" in out
+
+    def test_strips_function_call_tags(self):
+        out = _sanitize_tool_error("<function_call>x</function_call>")
+        assert "<function_call>" not in out
+        assert "</function_call>" not in out
+
+    def test_strips_role_tags(self):
+        # Each of these should be stripped
+        for tag in ("system", "assistant", "user", "result", "response", "output", "input"):
+            raw = f"prefix <{tag}>hi</{tag}> suffix"
+            out = _sanitize_tool_error(raw)
+            assert f"<{tag}>" not in out, f"failed to strip <{tag}>"
+            assert f"</{tag}>" not in out, f"failed to strip </{tag}>"
+
+    def test_role_tag_strip_is_case_insensitive(self):
+        out = _sanitize_tool_error("<TOOL_CALL>x</Tool_Call>")
+        assert "<" not in out.replace("[TOOL_ERROR]", "")  # only the prefix bracket survives
+
+    def test_unrelated_xml_kept(self):
+        # We intentionally only strip the role-like tag whitelist, not all XML
+        out = _sanitize_tool_error("Error parsing <ParseError>line 5</ParseError>")
+        assert "<ParseError>" in out
+
+
+class TestCDATAStripping:
+    def test_strips_cdata(self):
+        out = _sanitize_tool_error("error: <![CDATA[malicious]]> here")
+        assert "<![CDATA[" not in out
+        assert "]]>" not in out
+
+    def test_strips_multiline_cdata(self):
+        out = _sanitize_tool_error("a\n<![CDATA[line1\nline2]]>\nb")
+        assert "CDATA" not in out
+        assert "a" in out and "b" in out
+
+
+class TestCodeFenceStripping:
+    def test_strips_leading_fence_with_lang(self):
+        out = _sanitize_tool_error("```json\n{\"x\": 1}")
+        assert not out.replace("[TOOL_ERROR] ", "").startswith("```")
+
+    def test_strips_trailing_fence(self):
+        out = _sanitize_tool_error("payload\n```")
+        assert not out.rstrip().endswith("```")
+
+    def test_strips_bare_fence(self):
+        out = _sanitize_tool_error("```\nstuff")
+        assert "```" not in out.split("\n")[0]
+
+
+class TestTruncation:
+    def test_caps_long_input(self):
+        long = "A" * (_TOOL_ERROR_MAX_LEN * 2)
+        out = _sanitize_tool_error(long)
+        # Total length is prefix + truncated body
+        body = out[len("[TOOL_ERROR] "):]
+        assert len(body) == _TOOL_ERROR_MAX_LEN
+        assert body.endswith("...")
+
+    def test_does_not_truncate_short_input(self):
+        msg = "short error"
+        out = _sanitize_tool_error(msg)
+        assert "..." not in out
+        assert msg in out
+
+
+class TestEnvelope:
+    def test_wraps_with_prefix(self):
+        out = _sanitize_tool_error("oh no")
+        assert out.startswith("[TOOL_ERROR] ")
+
+    def test_empty_input(self):
+        out = _sanitize_tool_error("")
+        assert out == "[TOOL_ERROR] "
+
+    def test_preserves_normal_error_text(self):
+        msg = "Error executing read_file: FileNotFoundError: /tmp/missing"
+        out = _sanitize_tool_error(msg)
+        assert msg in out
+
+
+class TestHandleFunctionCallIntegration:
+    """Verify handle_function_call routes exception-path errors through the sanitizer.
+
+    Note: the "Unknown tool: ..." early-return in tools/registry.py is a
+    *different* code path from `except Exception` in handle_function_call —
+    that one returns directly without sanitization (and there's nothing to
+    sanitize in a hardcoded format string anyway). This test exercises the
+    real exception path by passing args that make a known tool raise.
+    """
+
+    def test_exception_path_error_is_sanitized(self):
+        import json
+        from model_tools import handle_function_call
+        from tools.registry import registry as _registry
+
+        # Force a known tool to raise with a payload containing role tags.
+        def boom(_args, **_kwargs):
+            raise RuntimeError("<tool_call>injected</tool_call> boom")
+
+        all_tools = _registry.get_all_tool_names()
+        assert all_tools, "no tools registered — test environment broken"
+        target = all_tools[0]
+        original = _registry._tools[target].handler
+        _registry._tools[target].handler = boom
+        try:
+            result_str = handle_function_call(target, {})
+        finally:
+            _registry._tools[target].handler = original
+
+        payload = json.loads(result_str)
+        assert "error" in payload, payload
+        assert payload["error"].startswith("[TOOL_ERROR] "), payload["error"]
+        # Role-tag stripping carried through
+        assert "<tool_call>" not in payload["error"]
+        assert "</tool_call>" not in payload["error"]
+        assert "boom" in payload["error"]
diff --git a/tools/registry.py b/tools/registry.py
index 2639eac74ed..7bb92e85f96 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -404,7 +404,16 @@ class ToolRegistry:
             return entry.handler(args, **kwargs)
         except Exception as e:
             logger.exception("Tool %s dispatch error: %s", name, e)
-            return json.dumps({"error": f"Tool execution failed: {type(e).__name__}: {e}"})
+            # Route through the sanitizer so framing tokens / CDATA / fences
+            # in exception strings don't reach the model as structural noise.
+            # See model_tools._sanitize_tool_error for rationale.
+            raw = f"Tool execution failed: {type(e).__name__}: {e}"
+            try:
+                from model_tools import _sanitize_tool_error
+                sanitized = _sanitize_tool_error(raw)
+            except Exception:
+                sanitized = raw  # defensive: never let the sanitizer block error propagation
+            return json.dumps({"error": sanitized})
 
     # ------------------------------------------------------------------
     # Query helpers  (replace redundant dicts in model_tools.py)

From 74d0b392e7a87c869d9e13cf3eba5d809d8ff1fa Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 00:58:27 -0700
Subject: [PATCH 116/218] feat(x_search): gated X (Twitter) search tool with
 OAuth-or-API-key auth (#26763)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(x_search): gated X (Twitter) search tool with OAuth-or-API-key auth

Salvages tools/x_search_tool.py from the closed PR #10786 (originally by
@Jaaneek) and reworks its credential resolution so the tool registers
when EITHER xAI credential path is available:

* XAI_API_KEY (paid xAI API key) is set in ~/.hermes/.env or the env, OR
* The user is signed in via xAI Grok OAuth — SuperGrok subscription —
  i.e. hermes auth add xai-oauth has been run

Both paths route through xAI's built-in x_search Responses tool at
https://api.x.ai/v1/responses. When both credentials exist OAuth wins,
matching tools/xai_http.py's existing preference order (uses SuperGrok
quota instead of paid API spend).

The check_fn calls resolve_xai_http_credentials() which auto-refreshes
the OAuth access token if it's within the refresh skew window, so a
True return means the bearer is fetchable AND non-empty.

Wiring
- tools/x_search_tool.py — new tool, ~370 LOC. Schema gated by check_fn,
  bearer resolved per-call so revoked OAuth surfaces a clean tool_error
  rather than an HTTP 401.
- toolsets.py — "x_search" toolset def. NOT added to _HERMES_CORE_TOOLS;
  users opt in via hermes tools.
- hermes_cli/tools_config.py — CONFIGURABLE_TOOLSETS entry + TOOL_CATEGORIES
  block with two provider options (OAuth + API key) sharing the existing
  xai_grok post_setup hook for credential bootstrap.
- hermes_cli/config.py — DEFAULT_CONFIG["x_search"] with model /
  timeout_seconds / retries. Additive nested key; no version bump.
- tests/tools/test_x_search_tool.py — 13 tests covering HTTP shape,
  handle validation, citation extraction, 4xx/5xx/timeout handling,
  and the full credential-resolution matrix (OAuth-only, API-key-only,
  both-set, neither-set, resolver-raises, config overrides, registry
  registration).
- website/docs/guides/xai-grok-oauth.md — adds X Search to the
  direct-to-xAI tools section with off-by-default note.
- website/docs/user-guide/features/tools.md — new row in the tools table.

Off by default — users enable via `hermes tools` → 🐦 X (Twitter) Search.
Schema only appears to the model when xAI credentials are configured.

Co-authored-by: Jaaneek <Jaaneek@users.noreply.github.com>

* docs(x_search): add dedicated feature page + reference entries

- website/docs/user-guide/features/x-search.md (new) — full feature
  walkthrough: authentication, enablement, configuration, parameters,
  returned fields, example, troubleshooting, see-also links.
- website/docs/reference/tools-reference.md — new "x_search" toolset
  section with parameter docs and credential gating note.
- website/docs/reference/toolsets-reference.md — new row in the
  toolset catalog table.
- website/sidebars.ts — wires the new feature page under
  Media & Web, after web-search.

---------

Co-authored-by: Jaaneek <Jaaneek@users.noreply.github.com>
---
 hermes_cli/config.py                         |  17 +
 hermes_cli/tools_config.py                   |  41 +-
 tests/tools/test_x_search_tool.py            | 438 +++++++++++++++++++
 tools/x_search_tool.py                       | 424 ++++++++++++++++++
 toolsets.py                                  |  11 +
 website/docs/guides/xai-grok-oauth.md        |   7 +-
 website/docs/reference/tools-reference.md    |   6 +
 website/docs/reference/toolsets-reference.md |   1 +
 website/docs/user-guide/features/tools.md    |   1 +
 website/docs/user-guide/features/x-search.md | 117 +++++
 website/sidebars.ts                          |   1 +
 11 files changed, 1062 insertions(+), 2 deletions(-)
 create mode 100644 tests/tools/test_x_search_tool.py
 create mode 100644 tools/x_search_tool.py
 create mode 100644 website/docs/user-guide/features/x-search.md

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 508de0d3faa..5419ef92b4c 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1603,6 +1603,23 @@ DEFAULT_CONFIG = {
         "servers": {},
     },
 
+    # X (Twitter) Search via xAI's built-in x_search Responses tool.
+    # The tool registers when xAI credentials are available (SuperGrok
+    # OAuth or XAI_API_KEY) AND the x_search toolset is enabled in
+    # `hermes tools`. These settings tune the backing Responses API call.
+    "x_search": {
+        # xAI model used for the Responses call. grok-4.20-reasoning is
+        # the recommended default; any Grok model with x_search tool
+        # access works.
+        "model": "grok-4.20-reasoning",
+        # Request timeout in seconds (minimum 30). x_search can take
+        # 60-120s for complex queries — the default is generous.
+        "timeout_seconds": 180,
+        # Number of automatic retries on 5xx / ReadTimeout / ConnectionError.
+        # Each retry backs off (1.5x attempt seconds, capped at 5s).
+        "retries": 2,
+    },
+
     # Config schema version - bump this when adding new required fields
     "_config_version": 23,
 }
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 377194589ea..074bd04aa64 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -61,6 +61,7 @@ CONFIGURABLE_TOOLSETS = [
     ("video",           "🎬 Video Analysis",            "video_analyze (requires video-capable model)"),
     ("image_gen",       "🎨 Image Generation",          "image_generate"),
     ("video_gen",       "🎬 Video Generation",          "video_generate (text-to-video + image-to-video)"),
+    ("x_search",        "🐦 X (Twitter) Search",        "x_search (requires xAI OAuth or XAI_API_KEY)"),
     ("moa",             "🧠 Mixture of Agents",         "mixture_of_agents"),
     ("tts",             "🔊 Text-to-Speech",            "text_to_speech"),
     ("skills",          "📚 Skills",                    "list, view, manage"),
@@ -86,7 +87,12 @@ CONFIGURABLE_TOOLSETS = [
 # Video gen is off by default — it's a niche, paid, slow feature. Users
 # who want it opt in via `hermes tools` → Video Generation, which walks
 # them through provider + model selection.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen"}
+#
+# X search is off by default — gated on xAI credentials (SuperGrok OAuth
+# or XAI_API_KEY). Users opt in via `hermes tools` → X (Twitter) Search,
+# which walks them through credential setup. The tool's check_fn means
+# the schema won't appear to the model even if enabled without credentials.
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen", "x_search"}
 
 # Platform-scoped toolsets: only appear in the `hermes tools` checklist for
 # these platforms, and only resolve/save for these platforms.  A toolset
@@ -308,6 +314,39 @@ TOOL_CATEGORIES = {
         # converge image_gen toward.
         "providers": [],
     },
+    "x_search": {
+        "name": "X (Twitter) Search",
+        "setup_title": "Select xAI Credential Source",
+        "setup_note": (
+            "Hermes routes X searches through xAI's built-in x_search "
+            "Responses tool. Both credential sources hit the same "
+            "https://api.x.ai/v1/responses endpoint — pick whichever you "
+            "already have. SuperGrok OAuth is preferred when both are set "
+            "(uses your subscription quota instead of API spend)."
+        ),
+        "icon": "🐦",
+        "providers": [
+            {
+                "name": "xAI Grok OAuth (SuperGrok Subscription)",
+                "badge": "subscription",
+                "tag": "Browser login at accounts.x.ai — no API key required",
+                "env_vars": [],
+                "post_setup": "xai_grok",
+            },
+            {
+                "name": "xAI API key",
+                "badge": "paid",
+                "tag": "Direct xAI API billing via XAI_API_KEY",
+                "env_vars": [
+                    {
+                        "key": "XAI_API_KEY",
+                        "prompt": "xAI API key",
+                        "url": "https://console.x.ai/",
+                    },
+                ],
+            },
+        ],
+    },
     "browser": {
         "name": "Browser Automation",
         "icon": "🌐",
diff --git a/tests/tools/test_x_search_tool.py b/tests/tools/test_x_search_tool.py
new file mode 100644
index 00000000000..7cbc4841a8a
--- /dev/null
+++ b/tests/tools/test_x_search_tool.py
@@ -0,0 +1,438 @@
+"""Tests for the X (Twitter) Search tool backed by xAI Responses API.
+
+Covers:
+- HTTP request shape (URL, headers, payload, model from config)
+- Handle filter validation (allowed vs excluded mutual exclusion)
+- Inline url_citation extraction from message annotations
+- Structured error handling (4xx with code, 5xx retry, ReadTimeout retry)
+- Credential resolution: API key path, OAuth path, both-set preference, none-set
+- check_x_search_requirements gating in registry
+"""
+
+import json
+
+import requests
+
+
+class _FakeResponse:
+    def __init__(self, payload, *, status_code=200, text=None):
+        self._payload = payload
+        self.status_code = status_code
+        self.text = text if text is not None else json.dumps(payload)
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            err = requests.HTTPError(f"{self.status_code} Client Error")
+            err.response = self
+            raise err
+
+    def json(self):
+        return self._payload
+
+
+# ---------------------------------------------------------------------------
+# Original PR #10786 test coverage (HTTP shape, handle validation, citations,
+# retry behavior) — preserved verbatim. Uses XAI_API_KEY env var via the
+# default resolver path.
+# ---------------------------------------------------------------------------
+
+def test_x_search_posts_responses_request(monkeypatch):
+    from tools.x_search_tool import x_search_tool
+    from hermes_cli import __version__
+
+    captured = {}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        captured["url"] = url
+        captured["headers"] = headers
+        captured["json"] = json
+        captured["timeout"] = timeout
+        return _FakeResponse(
+            {
+                "output_text": "People on X are discussing xAI's latest launch.",
+                "citations": [{"url": "https://x.com/example/status/1", "title": "Example post"}],
+            }
+        )
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+    monkeypatch.setattr("requests.post", _fake_post)
+
+    result = json.loads(
+        x_search_tool(
+            query="What are people saying about xAI on X?",
+            allowed_x_handles=["xai", "@grok"],
+            from_date="2026-04-01",
+            to_date="2026-04-10",
+            enable_image_understanding=True,
+        )
+    )
+
+    tool_def = captured["json"]["tools"][0]
+    assert captured["url"] == "https://api.x.ai/v1/responses"
+    assert captured["headers"]["User-Agent"] == f"Hermes-Agent/{__version__}"
+    assert captured["json"]["model"] == "grok-4.20-reasoning"
+    assert captured["json"]["store"] is False
+    assert tool_def["type"] == "x_search"
+    assert tool_def["allowed_x_handles"] == ["xai", "grok"]
+    assert tool_def["from_date"] == "2026-04-01"
+    assert tool_def["to_date"] == "2026-04-10"
+    assert tool_def["enable_image_understanding"] is True
+    assert result["success"] is True
+    assert result["answer"] == "People on X are discussing xAI's latest launch."
+
+
+def test_x_search_rejects_conflicting_handle_filters(monkeypatch):
+    from tools.x_search_tool import x_search_tool
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+    result = json.loads(
+        x_search_tool(
+            query="latest xAI discussion",
+            allowed_x_handles=["xai"],
+            excluded_x_handles=["grok"],
+        )
+    )
+
+    assert result["error"] == "allowed_x_handles and excluded_x_handles cannot be used together"
+
+
+def test_x_search_extracts_inline_url_citations(monkeypatch):
+    from tools.x_search_tool import x_search_tool
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        return _FakeResponse(
+            {
+                "output": [
+                    {
+                        "type": "message",
+                        "content": [
+                            {
+                                "type": "output_text",
+                                "text": "xAI posted an update on X.",
+                                "annotations": [
+                                    {
+                                        "type": "url_citation",
+                                        "url": "https://x.com/xai/status/123",
+                                        "title": "xAI update",
+                                        "start_index": 0,
+                                        "end_index": 3,
+                                    }
+                                ],
+                            }
+                        ],
+                    }
+                ]
+            }
+        )
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+    monkeypatch.setattr("requests.post", _fake_post)
+
+    result = json.loads(x_search_tool(query="latest post from xai"))
+
+    assert result["success"] is True
+    assert result["answer"] == "xAI posted an update on X."
+    assert result["inline_citations"] == [
+        {
+            "url": "https://x.com/xai/status/123",
+            "title": "xAI update",
+            "start_index": 0,
+            "end_index": 3,
+        }
+    ]
+
+
+def test_x_search_returns_structured_http_error(monkeypatch):
+    from tools.x_search_tool import x_search_tool
+
+    class _FailingResponse:
+        status_code = 403
+        text = '{"code":"forbidden","error":"x_search is not enabled for this model"}'
+
+        def json(self):
+            return {
+                "code": "forbidden",
+                "error": "x_search is not enabled for this model",
+            }
+
+        def raise_for_status(self):
+            err = requests.HTTPError("403 Client Error: Forbidden")
+            err.response = self
+            raise err
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+    monkeypatch.setattr("requests.post", lambda *a, **k: _FailingResponse())
+
+    result = json.loads(x_search_tool(query="latest xai discussion"))
+
+    assert result["success"] is False
+    assert result["provider"] == "xai"
+    assert result["tool"] == "x_search"
+    assert result["error_type"] == "HTTPError"
+    assert result["error"] == "forbidden: x_search is not enabled for this model"
+
+
+def test_x_search_retries_read_timeout_then_succeeds(monkeypatch):
+    from tools.x_search_tool import x_search_tool
+
+    calls = {"count": 0}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        calls["count"] += 1
+        if calls["count"] == 1:
+            raise requests.ReadTimeout("timed out")
+        return _FakeResponse(
+            {
+                "output_text": "Recovered after retry.",
+                "citations": [],
+            }
+        )
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+    monkeypatch.setattr("requests.post", _fake_post)
+    monkeypatch.setattr("tools.x_search_tool.time.sleep", lambda *_: None)
+
+    result = json.loads(x_search_tool(query="grok xai"))
+
+    assert calls["count"] == 2
+    assert result["success"] is True
+    assert result["answer"] == "Recovered after retry."
+
+
+def test_x_search_retries_5xx_then_succeeds(monkeypatch):
+    from tools.x_search_tool import x_search_tool
+
+    calls = {"count": 0}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        calls["count"] += 1
+        if calls["count"] == 1:
+            return _FakeResponse(
+                {"code": "Internal error", "error": "Service temporarily unavailable."},
+                status_code=500,
+            )
+        return _FakeResponse({"output_text": "Recovered after 5xx retry."})
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+    monkeypatch.setattr("requests.post", _fake_post)
+    monkeypatch.setattr("tools.x_search_tool.time.sleep", lambda *_: None)
+
+    result = json.loads(x_search_tool(query="grok xai"))
+
+    assert calls["count"] == 2
+    assert result["success"] is True
+    assert result["answer"] == "Recovered after 5xx retry."
+
+
+# ---------------------------------------------------------------------------
+# Credential-resolution coverage — the OAuth-or-API-key gating contract.
+# ---------------------------------------------------------------------------
+
+def _no_xai_env(monkeypatch):
+    """Strip any XAI_* env vars so the resolver doesn't see a leaked dev key."""
+    for var in ("XAI_API_KEY", "XAI_BASE_URL", "HERMES_XAI_BASE_URL"):
+        monkeypatch.delenv(var, raising=False)
+
+
+def test_x_search_uses_xai_oauth_when_only_oauth_available(monkeypatch):
+    """OAuth-only user: credential_source should be ``xai-oauth``."""
+    from tools.registry import invalidate_check_fn_cache
+    from tools.x_search_tool import check_x_search_requirements, x_search_tool
+
+    _no_xai_env(monkeypatch)
+
+    def _fake_resolve():
+        return {
+            "provider": "xai-oauth",
+            "api_key": "oauth-bearer-token",
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "tools.x_search_tool.resolve_xai_http_credentials", _fake_resolve
+    )
+    invalidate_check_fn_cache()
+
+    assert check_x_search_requirements() is True
+
+    captured = {}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        captured["headers"] = headers
+        return _FakeResponse({"output_text": "Found posts via OAuth."})
+
+    monkeypatch.setattr("requests.post", _fake_post)
+
+    result = json.loads(x_search_tool(query="anything about xai"))
+
+    assert result["success"] is True
+    assert result["credential_source"] == "xai-oauth"
+    assert captured["headers"]["Authorization"] == "Bearer oauth-bearer-token"
+
+
+def test_x_search_uses_api_key_when_only_xai_api_key_set(monkeypatch):
+    """API-key-only user: credential_source should be ``xai``."""
+    from tools.registry import invalidate_check_fn_cache
+    from tools.x_search_tool import check_x_search_requirements, x_search_tool
+
+    _no_xai_env(monkeypatch)
+
+    def _fake_resolve():
+        # Real ``resolve_xai_http_credentials`` returns ``"xai"`` when it
+        # falls through to the XAI_API_KEY env var path.
+        return {
+            "provider": "xai",
+            "api_key": "raw-api-key",
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "tools.x_search_tool.resolve_xai_http_credentials", _fake_resolve
+    )
+    invalidate_check_fn_cache()
+
+    assert check_x_search_requirements() is True
+
+    captured = {}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        captured["headers"] = headers
+        return _FakeResponse({"output_text": "Found posts via API key."})
+
+    monkeypatch.setattr("requests.post", _fake_post)
+
+    result = json.loads(x_search_tool(query="anything"))
+
+    assert result["success"] is True
+    assert result["credential_source"] == "xai"
+    assert captured["headers"]["Authorization"] == "Bearer raw-api-key"
+
+
+def test_x_search_prefers_oauth_when_both_available(monkeypatch):
+    """Both credentials present: OAuth wins (matches Teknium's billing preference).
+
+    The real ordering is implemented in ``tools.xai_http.resolve_xai_http_credentials``
+    — OAuth runtime first, fallback OAuth resolver second, ``XAI_API_KEY`` third.
+    This test exercises the contract by having the resolver return the OAuth
+    bearer (the ``xai-oauth`` ``provider`` tag is the marker).
+    """
+    from tools.registry import invalidate_check_fn_cache
+    from tools.x_search_tool import x_search_tool
+
+    monkeypatch.setenv("XAI_API_KEY", "raw-api-key")
+
+    # Mimic xai_http's preference: OAuth wins, so we return the OAuth tuple
+    # even though XAI_API_KEY is also set.
+    def _fake_resolve():
+        return {
+            "provider": "xai-oauth",
+            "api_key": "oauth-bearer-token",
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "tools.x_search_tool.resolve_xai_http_credentials", _fake_resolve
+    )
+    invalidate_check_fn_cache()
+
+    captured = {}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        captured["headers"] = headers
+        return _FakeResponse({"output_text": "OAuth preferred."})
+
+    monkeypatch.setattr("requests.post", _fake_post)
+
+    result = json.loads(x_search_tool(query="anything"))
+
+    assert result["credential_source"] == "xai-oauth"
+    assert captured["headers"]["Authorization"] == "Bearer oauth-bearer-token"
+
+
+def test_x_search_returns_tool_error_when_no_credentials(monkeypatch):
+    """No credentials anywhere: tool returns a clear error, not a 401 from xAI."""
+    from tools.registry import invalidate_check_fn_cache
+    from tools.x_search_tool import check_x_search_requirements, x_search_tool
+
+    _no_xai_env(monkeypatch)
+
+    def _fake_resolve():
+        return {
+            "provider": "xai",
+            "api_key": "",
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "tools.x_search_tool.resolve_xai_http_credentials", _fake_resolve
+    )
+    invalidate_check_fn_cache()
+
+    assert check_x_search_requirements() is False
+
+    # If a model somehow invokes the tool despite a False check_fn, the call
+    # surfaces a friendly error rather than an HTTP exception.
+    result = x_search_tool(query="anything")
+    assert "No xAI credentials available" in result
+    assert "hermes auth add xai-oauth" in result
+
+
+def test_x_search_check_fn_false_when_resolver_raises(monkeypatch):
+    """Resolver exceptions (e.g. expired token + failed refresh) gate the tool out."""
+    from tools.registry import invalidate_check_fn_cache
+    from tools.x_search_tool import check_x_search_requirements
+
+    _no_xai_env(monkeypatch)
+
+    def _boom():
+        raise RuntimeError("token revoked and refresh failed")
+
+    monkeypatch.setattr(
+        "tools.x_search_tool.resolve_xai_http_credentials", _boom
+    )
+    invalidate_check_fn_cache()
+
+    assert check_x_search_requirements() is False
+
+
+def test_x_search_honors_config_model_and_timeout(monkeypatch, tmp_path):
+    """``x_search.model`` and ``x_search.timeout_seconds`` override the defaults."""
+    from tools.x_search_tool import x_search_tool
+
+    monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+    # Patch the in-module config loader so tests don't touch ~/.hermes/config.yaml.
+    monkeypatch.setattr(
+        "tools.x_search_tool._load_x_search_config",
+        lambda: {"model": "grok-custom-test", "timeout_seconds": 45, "retries": 0},
+    )
+
+    captured = {}
+
+    def _fake_post(url, headers=None, json=None, timeout=None):
+        captured["model"] = json["model"]
+        captured["timeout"] = timeout
+        return _FakeResponse({"output_text": "Custom model OK."})
+
+    monkeypatch.setattr("requests.post", _fake_post)
+
+    result = json.loads(x_search_tool(query="anything"))
+
+    assert result["success"] is True
+    assert captured["model"] == "grok-custom-test"
+    assert captured["timeout"] == 45
+
+
+def test_x_search_registered_in_registry_with_check_fn():
+    """The tool is registered under the x_search toolset with the gating check_fn."""
+    import tools.x_search_tool  # noqa: F401 — ensures registration runs
+    from tools.registry import registry
+
+    entry = registry.get_entry("x_search")
+    assert entry is not None
+    assert entry.toolset == "x_search"
+    assert entry.check_fn is not None
+    assert entry.check_fn.__name__ == "check_x_search_requirements"
+    assert "XAI_API_KEY" in entry.requires_env
+    assert entry.emoji == "🐦"
diff --git a/tools/x_search_tool.py b/tools/x_search_tool.py
new file mode 100644
index 00000000000..8b242ee0ca8
--- /dev/null
+++ b/tools/x_search_tool.py
@@ -0,0 +1,424 @@
+#!/usr/bin/env python3
+"""X Search tool backed by xAI's built-in ``x_search`` Responses API tool.
+
+Authentication
+--------------
+The tool registers when **either** xAI credential path is available:
+
+* ``XAI_API_KEY`` is set in ``~/.hermes/.env`` or the process environment
+  (paid xAI API key), OR
+* The user is signed in via xAI Grok OAuth — SuperGrok subscription —
+  i.e. ``hermes auth add xai-oauth`` has been run and the stored refresh
+  token still works.
+
+Credential preference at call time matches
+:func:`tools.xai_http.resolve_xai_http_credentials`: SuperGrok OAuth first,
+direct OAuth resolver second, ``XAI_API_KEY`` last. That helper also
+auto-refreshes the OAuth access token when it's within the refresh skew
+window, so a ``True`` from :func:`check_x_search_requirements` means the
+bearer is fetchable AND non-empty.
+
+Salvaged from PR #10786 (originally by @Jaaneek); credential resolution
+reworked to honor both auth modes per Teknium's design.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+import requests
+
+from tools.registry import registry, tool_error
+from tools.xai_http import hermes_xai_user_agent, resolve_xai_http_credentials
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
+DEFAULT_X_SEARCH_MODEL = "grok-4.20-reasoning"
+DEFAULT_X_SEARCH_TIMEOUT_SECONDS = 180
+DEFAULT_X_SEARCH_RETRIES = 2
+MAX_HANDLES = 10
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+def _load_x_search_config() -> Dict[str, Any]:
+    try:
+        from hermes_cli.config import load_config
+
+        return load_config().get("x_search", {}) or {}
+    except Exception:
+        return {}
+
+
+def _get_x_search_model() -> str:
+    cfg = _load_x_search_config()
+    return (str(cfg.get("model") or "").strip() or DEFAULT_X_SEARCH_MODEL)
+
+
+def _get_x_search_timeout_seconds() -> int:
+    cfg = _load_x_search_config()
+    raw_value = cfg.get("timeout_seconds", DEFAULT_X_SEARCH_TIMEOUT_SECONDS)
+    try:
+        return max(30, int(raw_value))
+    except Exception:
+        return DEFAULT_X_SEARCH_TIMEOUT_SECONDS
+
+
+def _get_x_search_retries() -> int:
+    cfg = _load_x_search_config()
+    raw_value = cfg.get("retries", DEFAULT_X_SEARCH_RETRIES)
+    try:
+        return max(0, int(raw_value))
+    except Exception:
+        return DEFAULT_X_SEARCH_RETRIES
+
+
+# ---------------------------------------------------------------------------
+# Credential resolution
+# ---------------------------------------------------------------------------
+
+def _resolve_xai_bearer() -> Tuple[str, str, str]:
+    """Return ``(api_key, base_url, source)``.
+
+    ``source`` is one of ``"xai-oauth"`` or ``"xai"`` so callers (and tests)
+    can tell which credential path won. Raises ``RuntimeError`` if no usable
+    credential is available — the registered :func:`check_x_search_requirements`
+    gate makes that case unreachable in normal operation, but the runtime
+    check exists so a credential that expires between registration and
+    invocation produces a clean tool error instead of a 401.
+    """
+    creds = resolve_xai_http_credentials()
+    api_key = str(creds.get("api_key") or "").strip()
+    if not api_key:
+        raise RuntimeError(
+            "No xAI credentials available. Run `hermes auth add xai-oauth` "
+            "to sign in with your SuperGrok subscription, or set XAI_API_KEY."
+        )
+    base_url = str(creds.get("base_url") or DEFAULT_XAI_BASE_URL).strip().rstrip("/")
+    source = str(creds.get("provider") or "xai")
+    return api_key, base_url, source
+
+
+def check_x_search_requirements() -> bool:
+    """Return True when xAI credentials are available AND valid.
+
+    ``resolve_xai_http_credentials`` calls
+    :func:`hermes_cli.auth.resolve_xai_oauth_runtime_credentials` which
+    auto-refreshes the OAuth access token if it's expiring; a successful
+    return therefore implies a usable bearer.
+    """
+    try:
+        creds = resolve_xai_http_credentials()
+        return bool(str(creds.get("api_key") or "").strip())
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _normalize_handles(handles: Optional[List[str]], field_name: str) -> List[str]:
+    cleaned: List[str] = []
+    for handle in handles or []:
+        normalized = str(handle or "").strip().lstrip("@")
+        if normalized:
+            cleaned.append(normalized)
+    if len(cleaned) > MAX_HANDLES:
+        raise ValueError(f"{field_name} supports at most {MAX_HANDLES} handles")
+    return cleaned
+
+
+def _extract_response_text(payload: Dict[str, Any]) -> str:
+    output_text = str(payload.get("output_text") or "").strip()
+    if output_text:
+        return output_text
+
+    parts: List[str] = []
+    for item in payload.get("output", []) or []:
+        if item.get("type") != "message":
+            continue
+        for content in item.get("content", []) or []:
+            ctype = content.get("type")
+            if ctype in ("output_text", "text"):
+                text = str(content.get("text") or "").strip()
+                if text:
+                    parts.append(text)
+    return "\n\n".join(parts).strip()
+
+
+def _extract_inline_citations(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
+    citations: List[Dict[str, Any]] = []
+    for item in payload.get("output", []) or []:
+        if item.get("type") != "message":
+            continue
+        for content in item.get("content", []) or []:
+            for annotation in content.get("annotations", []) or []:
+                if annotation.get("type") != "url_citation":
+                    continue
+                citations.append(
+                    {
+                        "url": annotation.get("url", ""),
+                        "title": annotation.get("title", ""),
+                        "start_index": annotation.get("start_index"),
+                        "end_index": annotation.get("end_index"),
+                    }
+                )
+    return citations
+
+
+def _http_error_message(exc: requests.HTTPError) -> str:
+    response = getattr(exc, "response", None)
+    if response is None:
+        return str(exc)
+
+    try:
+        payload = response.json()
+    except Exception:
+        payload = None
+
+    if isinstance(payload, dict):
+        code = str(payload.get("code") or "").strip()
+        error = str(payload.get("error") or "").strip()
+        message = error or str(payload)
+        if code and code not in message:
+            message = f"{code}: {message}"
+        return message or str(exc)
+
+    text = str(getattr(response, "text", "") or "").strip()
+    if text:
+        return text[:500]
+    return str(exc)
+
+
+# ---------------------------------------------------------------------------
+# Tool implementation
+# ---------------------------------------------------------------------------
+
+def x_search_tool(
+    query: str,
+    allowed_x_handles: Optional[List[str]] = None,
+    excluded_x_handles: Optional[List[str]] = None,
+    from_date: str = "",
+    to_date: str = "",
+    enable_image_understanding: bool = False,
+    enable_video_understanding: bool = False,
+) -> str:
+    if not query or not query.strip():
+        return tool_error("query is required for x_search")
+
+    try:
+        api_key, base_url, source = _resolve_xai_bearer()
+    except RuntimeError as exc:
+        return tool_error(str(exc))
+
+    try:
+        allowed = _normalize_handles(allowed_x_handles, "allowed_x_handles")
+        excluded = _normalize_handles(excluded_x_handles, "excluded_x_handles")
+        if allowed and excluded:
+            return tool_error("allowed_x_handles and excluded_x_handles cannot be used together")
+
+        tool_def: Dict[str, Any] = {"type": "x_search"}
+        if allowed:
+            tool_def["allowed_x_handles"] = allowed
+        if excluded:
+            tool_def["excluded_x_handles"] = excluded
+        if from_date.strip():
+            tool_def["from_date"] = from_date.strip()
+        if to_date.strip():
+            tool_def["to_date"] = to_date.strip()
+        if enable_image_understanding:
+            tool_def["enable_image_understanding"] = True
+        if enable_video_understanding:
+            tool_def["enable_video_understanding"] = True
+
+        payload = {
+            "model": _get_x_search_model(),
+            "input": [
+                {
+                    "role": "user",
+                    "content": query.strip(),
+                }
+            ],
+            "tools": [tool_def],
+            "store": False,
+        }
+
+        timeout_seconds = _get_x_search_timeout_seconds()
+        max_retries = _get_x_search_retries()
+        response: Optional[requests.Response] = None
+        for attempt in range(max_retries + 1):
+            try:
+                response = requests.post(
+                    f"{base_url}/responses",
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                        "User-Agent": hermes_xai_user_agent(),
+                    },
+                    json=payload,
+                    timeout=timeout_seconds,
+                )
+                response.raise_for_status()
+                break
+            except requests.HTTPError as e:
+                status_code = getattr(getattr(e, "response", None), "status_code", None)
+                if status_code is None or status_code < 500 or attempt >= max_retries:
+                    raise
+                logger.warning(
+                    "x_search upstream failure on attempt %s/%s: %s",
+                    attempt + 1,
+                    max_retries + 1,
+                    _http_error_message(e),
+                )
+                time.sleep(min(5.0, 1.5 * (attempt + 1)))
+            except (requests.ReadTimeout, requests.ConnectionError) as e:
+                if attempt >= max_retries:
+                    raise
+                logger.warning(
+                    "x_search transient failure on attempt %s/%s: %s",
+                    attempt + 1,
+                    max_retries + 1,
+                    e,
+                )
+                time.sleep(min(5.0, 1.5 * (attempt + 1)))
+
+        if response is None:
+            raise RuntimeError("x_search request did not return a response")
+
+        data = response.json()
+
+        answer = _extract_response_text(data)
+        citations = list(data.get("citations") or [])
+        inline_citations = _extract_inline_citations(data)
+
+        return json.dumps(
+            {
+                "success": True,
+                "provider": "xai",
+                "credential_source": source,
+                "tool": "x_search",
+                "model": payload["model"],
+                "query": query.strip(),
+                "answer": answer,
+                "citations": citations,
+                "inline_citations": inline_citations,
+            },
+            ensure_ascii=False,
+        )
+    except requests.HTTPError as e:
+        logger.error("x_search failed: %s", e, exc_info=True)
+        return json.dumps(
+            {
+                "success": False,
+                "provider": "xai",
+                "tool": "x_search",
+                "error": _http_error_message(e),
+                "error_type": type(e).__name__,
+            },
+            ensure_ascii=False,
+        )
+    except requests.ReadTimeout as e:
+        logger.error("x_search timed out: %s", e, exc_info=True)
+        return json.dumps(
+            {
+                "success": False,
+                "provider": "xai",
+                "tool": "x_search",
+                "error": f"xAI x_search timed out after {_get_x_search_timeout_seconds()} seconds",
+                "error_type": type(e).__name__,
+            },
+            ensure_ascii=False,
+        )
+    except Exception as e:
+        logger.error("x_search failed: %s", e, exc_info=True)
+        return json.dumps(
+            {
+                "success": False,
+                "provider": "xai",
+                "tool": "x_search",
+                "error": str(e),
+                "error_type": type(e).__name__,
+            },
+            ensure_ascii=False,
+        )
+
+
+X_SEARCH_SCHEMA = {
+    "name": "x_search",
+    "description": (
+        "Search X (Twitter) posts, profiles, and threads using xAI's built-in "
+        "X Search tool. Use this for current discussion, reactions, or claims "
+        "on X rather than general web pages. Available when xAI credentials "
+        "are configured (SuperGrok OAuth or XAI_API_KEY)."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "What to look up on X.",
+            },
+            "allowed_x_handles": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional list of X handles to include exclusively (max 10).",
+            },
+            "excluded_x_handles": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional list of X handles to exclude (max 10).",
+            },
+            "from_date": {
+                "type": "string",
+                "description": "Optional start date in YYYY-MM-DD format.",
+            },
+            "to_date": {
+                "type": "string",
+                "description": "Optional end date in YYYY-MM-DD format.",
+            },
+            "enable_image_understanding": {
+                "type": "boolean",
+                "description": "Whether xAI should analyze images attached to matching X posts.",
+                "default": False,
+            },
+            "enable_video_understanding": {
+                "type": "boolean",
+                "description": "Whether xAI should analyze videos attached to matching X posts.",
+                "default": False,
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+
+def _handle_x_search(args, **kw):
+    return x_search_tool(
+        query=args.get("query", ""),
+        allowed_x_handles=args.get("allowed_x_handles"),
+        excluded_x_handles=args.get("excluded_x_handles"),
+        from_date=args.get("from_date", ""),
+        to_date=args.get("to_date", ""),
+        enable_image_understanding=bool(args.get("enable_image_understanding", False)),
+        enable_video_understanding=bool(args.get("enable_video_understanding", False)),
+    )
+
+
+registry.register(
+    name="x_search",
+    toolset="x_search",
+    schema=X_SEARCH_SCHEMA,
+    handler=_handle_x_search,
+    check_fn=check_x_search_requirements,
+    requires_env=["XAI_API_KEY"],
+    emoji="🐦",
+    max_result_size_chars=100_000,
+)
diff --git a/toolsets.py b/toolsets.py
index 8ec45f11a2f..5de07e4c7a1 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -88,6 +88,17 @@ TOOLSETS = {
         "tools": ["web_search"],
         "includes": []
     },
+
+    "x_search": {
+        "description": (
+            "Search X (Twitter) posts and threads via xAI's built-in "
+            "x_search Responses tool. Available when xAI credentials are "
+            "configured (SuperGrok OAuth or XAI_API_KEY). Off by default; "
+            "enable in `hermes tools` → X (Twitter) Search."
+        ),
+        "tools": ["x_search"],
+        "includes": []
+    },
     
     "vision": {
         "description": "Image analysis and vision tools",
diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md
index 67d31c929ad..d85aa4c64bf 100644
--- a/website/docs/guides/xai-grok-oauth.md
+++ b/website/docs/guides/xai-grok-oauth.md
@@ -128,7 +128,7 @@ hermes --provider x-ai-oauth       # alias
 hermes --provider xai-grok-oauth   # alias
 ```
 
-## Direct-to-xAI Tools (TTS / Image / Video / Transcription)
+## Direct-to-xAI Tools (TTS / Image / Video / Transcription / X Search)
 
 Once you're logged in via OAuth, every direct-to-xAI tool reuses the same bearer token automatically — there is **no separate setup** unless you'd rather use an API key.
 
@@ -139,6 +139,7 @@ hermes tools
 # → Text-to-Speech       → "xAI TTS"
 # → Image Generation     → "xAI Grok Imagine (image)"
 # → Video Generation     → "xAI Grok Imagine"
+# → X (Twitter) Search   → "xAI Grok OAuth (SuperGrok Subscription)"
 ```
 
 If OAuth tokens are already stored, the picker confirms it and skips the credential prompt. If neither OAuth nor `XAI_API_KEY` is set, the picker offers a 3-choice menu: OAuth login, paste API key, or skip.
@@ -147,6 +148,10 @@ If OAuth tokens are already stored, the picker confirms it and skips the credent
 The `video_gen` toolset is disabled by default. Enable it in `hermes tools` → `🎬 Video Generation` (press space) before the agent can call `video_generate`. Otherwise the agent may fall back to the bundled ComfyUI skill, which is also tagged for video generation.
 :::
 
+:::note X search is off by default
+The `x_search` toolset is disabled by default. Enable it in `hermes tools` → `🐦 X (Twitter) Search` (press space) before the agent can call `x_search`. The tool routes through xAI's built-in `x_search` Responses API — it works with **either** your SuperGrok OAuth login or a paid `XAI_API_KEY`, and prefers OAuth when both are configured (uses your subscription quota instead of API spend). The tool schema is hidden from the model when no xAI credentials are configured, regardless of whether the toolset is enabled.
+:::
+
 ### Models
 
 | Tool | Model | Notes |
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 03930264f8c..507bd307afb 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -196,6 +196,12 @@ Opt-in toolset (not loaded in the default `hermes-cli` set). Add via `--toolsets
 | `web_search` | Search the web for information. Returns up to 5 results by default with titles, URLs, and descriptions. Accepts an optional `limit` (1-100, default 5). The query is passed through to the configured backend, so operators such as `site:domain`, `filetype:pdf`, `intitle:word`, `-term`, and `"exact phrase"` may work when the backend supports them. | EXA_API_KEY or PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY |
 | `web_extract` | Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized. | EXA_API_KEY or PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY |
 
+## `x_search` toolset
+
+| Tool | Description | Requires environment |
+|------|-------------|----------------------|
+| `x_search` | Search X (Twitter) posts, profiles, and threads using xAI's built-in `x_search` Responses tool. Use this for current discussion, reactions, or claims on X rather than general web pages. Off by default — opt in via `hermes tools` → 🐦 X (Twitter) Search. Schema is only registered when xAI credentials are configured (check_fn-gated). | XAI_API_KEY **or** xAI Grok OAuth (SuperGrok Subscription) login |
+
 ## `tts` toolset
 
 | Tool | Description | Requires environment |
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index 5bf1f14260e..61b51e4e30e 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -82,6 +82,7 @@ Or in-session:
 | `vision` | `vision_analyze` | Image analysis via vision-capable models. |
 | `video` | `video_analyze` | Video analysis and understanding tools (opt-in, not in the default toolset — add explicitly via `--toolsets`). |
 | `web` | `web_extract`, `web_search` | Web search and page content extraction. |
+| `x_search` | `x_search` | Search X (Twitter) posts and threads via xAI's built-in `x_search` Responses tool. Off by default; opt in via `hermes tools`. Schema only registered when xAI credentials (SuperGrok OAuth or `XAI_API_KEY`) are configured. |
 | `yuanbao` | `yb_query_group_info`, `yb_query_group_members`, `yb_search_sticker`, `yb_send_dm`, `yb_send_sticker` | Yuanbao DM/group actions and sticker search. Registered only on `hermes-yuanbao`. |
 
 ## Platform Toolsets
diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md
index 9f9eddbb513..0c5dd30cb2c 100644
--- a/website/docs/user-guide/features/tools.md
+++ b/website/docs/user-guide/features/tools.md
@@ -21,6 +21,7 @@ High-level categories:
 | Category | Examples | Description |
 |----------|----------|-------------|
 | **Web** | `web_search`, `web_extract` | Search the web and extract page content. |
+| **X Search** | `x_search` | Search X (Twitter) posts and threads via xAI's built-in `x_search` Responses tool — gated on xAI credentials (SuperGrok OAuth or `XAI_API_KEY`); off by default, opt in via `hermes tools` → 🐦 X (Twitter) Search. |
 | **Terminal & Files** | `terminal`, `process`, `read_file`, `patch` | Execute commands and manipulate files. |
 | **Browser** | `browser_navigate`, `browser_snapshot`, `browser_vision` | Interactive browser automation with text and vision support. |
 | **Media** | `vision_analyze`, `image_generate`, `text_to_speech` | Multimodal analysis and generation. |
diff --git a/website/docs/user-guide/features/x-search.md b/website/docs/user-guide/features/x-search.md
new file mode 100644
index 00000000000..c01bb8adf6d
--- /dev/null
+++ b/website/docs/user-guide/features/x-search.md
@@ -0,0 +1,117 @@
+---
+title: X (Twitter) Search
+description: Search X (Twitter) posts and threads from within the agent using xAI's built-in x_search Responses tool — works with either a SuperGrok OAuth login or an XAI_API_KEY.
+sidebar_label: X (Twitter) Search
+sidebar_position: 7
+---
+
+# X (Twitter) Search
+
+The `x_search` tool lets the agent search X (Twitter) posts, profiles, and threads directly. It's backed by xAI's built-in `x_search` tool on the Responses API at `https://api.x.ai/v1/responses` — Grok itself runs the search server-side and returns synthesized results with citations to the originating posts.
+
+**Use this instead of `web_search`** when you specifically want current discussion, reactions, or claims **on X**. For general web pages, keep using `web_search` / `web_extract`.
+
+## Authentication
+
+`x_search` registers when **either** xAI credential path is available:
+
+| Credential | Source | Setup |
+|------------|--------|-------|
+| **SuperGrok OAuth** (preferred) | Browser login at `accounts.x.ai`, refreshed automatically | `hermes auth add xai-oauth` — see [xAI Grok OAuth (SuperGrok Subscription)](../../guides/xai-grok-oauth.md) |
+| **`XAI_API_KEY`** | Paid xAI API key | Set in `~/.hermes/.env` |
+
+Both hit the same endpoint with the same payload — the only difference is the bearer token. **When both are configured, SuperGrok OAuth wins** so x_search runs against your subscription quota instead of paid API spend.
+
+The tool's `check_fn` runs the xAI credential resolver every time the model's tool list is rebuilt. A `True` return means the bearer is fetchable AND non-empty AND (if it had expired) successfully refreshed. Revoked tokens with a failed refresh hide the tool from the schema; the model simply can't see it.
+
+## Enabling the tool
+
+Off by default. Enable in `hermes tools`:
+
+```bash
+hermes tools
+# → 🐦 X (Twitter) Search   (press space to toggle on)
+```
+
+The picker offers two credential choices:
+
+1. **xAI Grok OAuth (SuperGrok Subscription)** — opens the browser to `accounts.x.ai` if you're not already logged in
+2. **xAI API key** — prompts for `XAI_API_KEY`
+
+Either choice satisfies the gating. You can pick whichever credentials you already have; the tool works identically with both. If both end up configured, OAuth is preferred at call time.
+
+## Configuration
+
+```yaml
+# ~/.hermes/config.yaml
+x_search:
+  # xAI model used for the Responses call.
+  # grok-4.20-reasoning is the recommended default; any Grok model
+  # with x_search tool access works.
+  model: grok-4.20-reasoning
+
+  # Request timeout in seconds. x_search can take 60–120s for
+  # complex queries — the default is generous. Minimum: 30.
+  timeout_seconds: 180
+
+  # Number of automatic retries on 5xx / ReadTimeout / ConnectionError.
+  # Each retry backs off (1.5x attempt seconds, capped at 5s).
+  retries: 2
+```
+
+## Tool parameters
+
+The agent calls `x_search` with these arguments:
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `query` | string (required) | What to look up on X. |
+| `allowed_x_handles` | string array | Optional list of handles to include **exclusively** (max 10). Leading `@` is stripped. |
+| `excluded_x_handles` | string array | Optional list of handles to exclude (max 10). Mutually exclusive with `allowed_x_handles`. |
+| `from_date` | string | Optional `YYYY-MM-DD` start date. |
+| `to_date` | string | Optional `YYYY-MM-DD` end date. |
+| `enable_image_understanding` | boolean | Ask xAI to analyze images attached to matching posts. |
+| `enable_video_understanding` | boolean | Ask xAI to analyze videos attached to matching posts. |
+
+The tool returns JSON with:
+
+- `answer` — synthesized text response from Grok
+- `citations` — citations returned by the Responses API top-level field
+- `inline_citations` — `url_citation` annotations extracted from the message body (each with `url`, `title`, `start_index`, `end_index`)
+- `credential_source` — `"xai-oauth"` if OAuth resolved, `"xai"` if API key resolved
+- `model`, `query`, `provider`, `tool`, `success`
+
+## Example
+
+Talking to the agent:
+
+> What are people on X saying about the new Grok image features? Focus on responses from @xai.
+
+The agent will:
+
+1. Call `x_search` with `query="reactions to new Grok image features"`, `allowed_x_handles=["xai"]`
+2. Get back a synthesized answer plus a list of citations linking to specific posts
+3. Reply with the answer and references
+
+## Troubleshooting
+
+### "No xAI credentials available"
+
+The tool surfaces this when both auth paths fail. Either set `XAI_API_KEY` in `~/.hermes/.env` or run `hermes auth add xai-oauth` and complete the browser login. Then restart your session so the agent re-reads the tool registry.
+
+### "`x_search` is not enabled for this model"
+
+The configured `x_search.model` doesn't have access to the server-side `x_search` tool. Switch to `grok-4.20-reasoning` (the default) or another Grok model that supports it. Check the [xAI documentation](https://docs.x.ai/) for the current list.
+
+### Tool doesn't appear in the schema
+
+Two possible causes:
+
+1. **Toolset not enabled.** Run `hermes tools` and confirm `🐦 X (Twitter) Search` is checked.
+2. **No xAI credentials.** The check_fn returns False, so the schema stays hidden. Run `hermes auth status` to confirm xai-oauth login state, and check that `XAI_API_KEY` is set (if you're using the API-key path).
+
+## See Also
+
+- [xAI Grok OAuth (SuperGrok Subscription)](../../guides/xai-grok-oauth.md) — the OAuth setup guide
+- [Web Search & Extract](web-search.md) — for general (non-X) web search
+- [Tools Reference](../../reference/tools-reference.md) — full tool catalog
diff --git a/website/sidebars.ts b/website/sidebars.ts
index 2f870a97696..f619f2318c9 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -83,6 +83,7 @@ const sidebars: SidebarsConfig = {
           items: [
             'user-guide/features/voice-mode',
             'user-guide/features/web-search',
+            'user-guide/features/x-search',
             'user-guide/features/browser',
             'user-guide/features/computer-use',
             'user-guide/features/vision',

From c445f48b78ad9dfb142d2337b51f227bde66cc84 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 01:00:27 -0700
Subject: [PATCH 117/218] fix(delegation): honor api_mode + auto-detect
 anthropic_messages URLs (#26824)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Subagent delegation hardcoded api_mode='chat_completions' for any
delegation.base_url that didn't match three specific hostnames
(chatgpt.com, api.anthropic.com, api.kimi.com/coding), and never
read delegation.api_mode from config. Azure AI Foundry's
https://foundry.services.ai.azure.com/anthropic endpoint fell through
and got chat_completions, causing 404s on every delegate_task call.

The main agent already handles this correctly via the shared
_detect_api_mode_for_url() helper (anything ending in /anthropic →
anthropic_messages); delegation reimplemented its own narrower check.

Reuse the shared detector and honor an explicit delegation.api_mode
when set so users can also force the transport on non-standard
endpoints the URL heuristic can't classify.

Fixes #10213.

Co-authored-by: HiddenPuppy <HiddenPuppy@users.noreply.github.com>
---
 hermes_cli/config.py         |  4 +++
 tests/tools/test_delegate.py | 57 ++++++++++++++++++++++++++++++++++++
 tools/delegate_tool.py       | 16 +++++++++-
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 5419ef92b4c..574f2397d91 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1146,6 +1146,10 @@ DEFAULT_CONFIG = {
         "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
         "base_url": "",    # direct OpenAI-compatible endpoint for subagents
         "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
+        "api_mode": "",    # wire protocol for delegation.base_url: "chat_completions",
+                           # "codex_responses", or "anthropic_messages". Empty = auto-detect
+                           # from URL (e.g. /anthropic suffix → anthropic_messages). Set this
+                           # explicitly for non-standard endpoints the heuristic can't detect.
         # When delegate_task narrows child toolsets explicitly, preserve any
         # MCP toolsets the parent already has enabled. On by default so
         # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 468fbdaf942..684f24f5da8 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -890,6 +890,63 @@ class TestDelegationCredentialResolution(unittest.TestCase):
         self.assertEqual(creds["api_key"], "local-key")
         self.assertEqual(creds["api_mode"], "chat_completions")
 
+    def test_direct_endpoint_auto_detects_anthropic_messages_suffix(self):
+        # Issue #10213: Azure AI Foundry exposes Anthropic-compatible models at
+        # a /anthropic URL suffix. Subagents must pick anthropic_messages
+        # automatically, matching the main agent's runtime resolver.
+        parent = _make_mock_parent(depth=0)
+        cfg = {
+            "model": "claude-opus-4-6",
+            "provider": "custom",
+            "base_url": "https://myfoundry.services.ai.azure.com/anthropic",
+            "api_key": "foundry-key",
+        }
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["provider"], "custom")
+        self.assertEqual(creds["base_url"], "https://myfoundry.services.ai.azure.com/anthropic")
+        self.assertEqual(creds["api_key"], "foundry-key")
+        self.assertEqual(creds["api_mode"], "anthropic_messages")
+
+    def test_direct_endpoint_honors_explicit_api_mode(self):
+        # When delegation.api_mode is set explicitly, it overrides URL-based
+        # detection so users can force a transport on non-standard endpoints.
+        parent = _make_mock_parent(depth=0)
+        cfg = {
+            "model": "claude-opus-4-6",
+            "provider": "custom",
+            "base_url": "https://proxy.example.com/v1",
+            "api_key": "proxy-key",
+            "api_mode": "anthropic_messages",
+        }
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["api_mode"], "anthropic_messages")
+
+    def test_direct_endpoint_explicit_api_mode_overrides_url_detection(self):
+        # Explicit api_mode in config always wins over auto-detection.
+        parent = _make_mock_parent(depth=0)
+        cfg = {
+            "model": "claude-opus-4-6",
+            "provider": "custom",
+            "base_url": "https://myfoundry.services.ai.azure.com/anthropic",
+            "api_key": "foundry-key",
+            "api_mode": "chat_completions",
+        }
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["api_mode"], "chat_completions")
+
+    def test_direct_endpoint_invalid_api_mode_falls_back_to_detection(self):
+        # An invalid api_mode string must not break detection; fall back to URL heuristic.
+        parent = _make_mock_parent(depth=0)
+        cfg = {
+            "model": "claude-opus-4-6",
+            "provider": "custom",
+            "base_url": "https://myfoundry.services.ai.azure.com/anthropic",
+            "api_key": "foundry-key",
+            "api_mode": "garbage",
+        }
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["api_mode"], "anthropic_messages")
+
     def test_direct_endpoint_returns_none_api_key_when_not_configured(self):
         # When base_url is set without api_key, api_key should be None so
         # _build_child_agent inherits the parent's key (effective_api_key = override or parent).
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index f3a037c4341..136ea63ac40 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -2362,6 +2362,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     configured_provider = str(cfg.get("provider") or "").strip() or None
     configured_base_url = str(cfg.get("base_url") or "").strip() or None
     configured_api_key = str(cfg.get("api_key") or "").strip() or None
+    configured_api_mode = str(cfg.get("api_mode") or "").strip().lower() or None
 
     if configured_base_url:
         # When delegation.api_key is not set, return None so _build_child_agent
@@ -2372,9 +2373,17 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
         # callers to duplicate the key under delegation.api_key.
         api_key = configured_api_key  # None → inherited from parent in _build_child_agent
 
+        # Use the shared URL-based api_mode detector (same path the main agent's
+        # runtime resolver uses) so Anthropic-compatible direct endpoints with a
+        # /anthropic suffix — Azure AI Foundry, MiniMax, Zhipu GLM, LiteLLM
+        # proxies — pick the right transport automatically. Without this,
+        # subagents would default to chat_completions and hit 404s on endpoints
+        # that only speak the Anthropic Messages protocol. Fixes #10213.
+        from hermes_cli.runtime_provider import _detect_api_mode_for_url
+
         base_lower = configured_base_url.lower()
         provider = "custom"
-        api_mode = "chat_completions"
+        api_mode = _detect_api_mode_for_url(configured_base_url) or "chat_completions"
         if (
             base_url_hostname(configured_base_url) == "chatgpt.com"
             and "/backend-api/codex" in base_lower
@@ -2388,6 +2397,11 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
             provider = "custom"
             api_mode = "anthropic_messages"
 
+        # Explicit delegation.api_mode in config always wins. Lets users force
+        # a transport for non-standard endpoints the URL heuristic can't detect.
+        if configured_api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
+            api_mode = configured_api_mode
+
         return {
             "model": configured_model,
             "provider": provider,

From 395e9dd9e298df682bbf77848636e9f61f713171 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 01:04:28 -0700
Subject: [PATCH 118/218] feat: add supports_parallel_tool_calls for MCP
 servers (#26825)

Port from openai/codex#17667: MCP servers can now opt-in to parallel
tool execution by setting supports_parallel_tool_calls: true in their
config. This allows tools from the same server to run concurrently
within a single tool-call batch, matching the behavior already available
for built-in tools like web_search and read_file.

Previously all MCP tools were forced sequential because they weren't in
the _PARALLEL_SAFE_TOOLS set. Now _should_parallelize_tool_batch checks
is_mcp_tool_parallel_safe() which looks up the server's config flag.

Config example:
  mcp_servers:
    docs:
      command: "docs-server"
      supports_parallel_tool_calls: true

Changes:
- tools/mcp_tool.py: Track parallel-safe servers in _parallel_safe_servers
  set, populated during register_mcp_servers(). Add is_mcp_tool_parallel_safe()
  public API.
- run_agent.py: Add _is_mcp_tool_parallel_safe() lazy-import wrapper. Update
  _should_parallelize_tool_batch() to check MCP tools against server config.
- 11 new tests covering the feature end-to-end.
- Updated MCP docs and config reference.
---
 run_agent.py                                  |  17 ++-
 tests/run_agent/test_run_agent.py             |  54 +++++++
 tests/tools/test_mcp_tool.py                  | 132 ++++++++++++++++++
 tools/mcp_tool.py                             |  39 +++++-
 .../docs/reference/mcp-config-reference.md    |   2 +
 website/docs/user-guide/features/mcp.md       |  18 +++
 6 files changed, 260 insertions(+), 2 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 88d5c95fcd8..310777076cb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -393,6 +393,19 @@ def _is_destructive_command(cmd: str) -> bool:
     return False
 
 
+def _is_mcp_tool_parallel_safe(tool_name: str) -> bool:
+    """Check if an MCP tool comes from a server with parallel tool calls enabled.
+
+    Lazy-imports from ``tools.mcp_tool`` to avoid circular dependencies.
+    Returns False if the MCP module is not available.
+    """
+    try:
+        from tools.mcp_tool import is_mcp_tool_parallel_safe
+        return is_mcp_tool_parallel_safe(tool_name)
+    except Exception:
+        return False
+
+
 def _should_parallelize_tool_batch(tool_calls) -> bool:
     """Return True when a tool-call batch is safe to run concurrently."""
     if len(tool_calls) <= 1:
@@ -432,7 +445,9 @@ def _should_parallelize_tool_batch(tool_calls) -> bool:
             continue
 
         if tool_name not in _PARALLEL_SAFE_TOOLS:
-            return False
+            # Check if it's an MCP tool from a server that opted into parallel calls.
+            if not _is_mcp_tool_parallel_safe(tool_name):
+                return False
 
     return True
 
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index c493f91509a..cd62cd41ded 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -2269,6 +2269,60 @@ class TestParallelScopePathNormalization:
         assert not _should_parallelize_tool_batch([tc1, tc2])
 
 
+class TestMcpParallelToolBatch:
+    """Integration test: _should_parallelize_tool_batch respects MCP parallel flag."""
+
+    def test_mcp_tools_default_sequential(self):
+        """MCP tools without supports_parallel_tool_calls are sequential."""
+        from run_agent import _should_parallelize_tool_batch
+        tc1 = _mock_tool_call(name="mcp_github_list_repos", arguments='{"org":"openai"}', call_id="c1")
+        tc2 = _mock_tool_call(name="mcp_github_search_code", arguments='{"q":"test"}', call_id="c2")
+        assert not _should_parallelize_tool_batch([tc1, tc2])
+
+    def test_mcp_tools_parallel_when_server_opted_in(self):
+        """MCP tools from a parallel-safe server can run concurrently."""
+        from run_agent import _should_parallelize_tool_batch
+        from tools.mcp_tool import _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.add("github")
+        try:
+            tc1 = _mock_tool_call(name="mcp_github_list_repos", arguments='{"org":"openai"}', call_id="c1")
+            tc2 = _mock_tool_call(name="mcp_github_search_code", arguments='{"q":"test"}', call_id="c2")
+            assert _should_parallelize_tool_batch([tc1, tc2])
+        finally:
+            with _lock:
+                _parallel_safe_servers.discard("github")
+
+    def test_mixed_mcp_and_builtin_parallel(self):
+        """MCP parallel tools mixed with built-in parallel-safe tools."""
+        from run_agent import _should_parallelize_tool_batch
+        from tools.mcp_tool import _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.add("docs")
+        try:
+            tc1 = _mock_tool_call(name="mcp_docs_search", arguments='{"query":"api"}', call_id="c1")
+            tc2 = _mock_tool_call(name="web_search", arguments='{"query":"test"}', call_id="c2")
+            assert _should_parallelize_tool_batch([tc1, tc2])
+        finally:
+            with _lock:
+                _parallel_safe_servers.discard("docs")
+
+    def test_mixed_parallel_and_serial_mcp_servers(self):
+        """One parallel MCP server + one non-parallel MCP server = sequential."""
+        from run_agent import _should_parallelize_tool_batch
+        from tools.mcp_tool import _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.add("docs")
+            # "github" is NOT in _parallel_safe_servers
+        try:
+            tc1 = _mock_tool_call(name="mcp_docs_search", arguments='{"query":"api"}', call_id="c1")
+            tc2 = _mock_tool_call(name="mcp_github_list_repos", arguments='{"org":"openai"}', call_id="c2")
+            assert not _should_parallelize_tool_batch([tc1, tc2])
+        finally:
+            with _lock:
+                _parallel_safe_servers.discard("docs")
+
+
 class TestHandleMaxIterations:
     def test_returns_summary(self, agent):
         resp = _mock_response(content="Here is a summary of what I did.")
diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index 7f6c3f6704c..0a094eb5467 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -3762,3 +3762,135 @@ class TestRegisterMcpServers:
                 )
 
         _servers.pop("srv", None)
+
+
+# ---------------------------------------------------------------------------
+# Tests for parallel tool call support (port from openai/codex#17667)
+# ---------------------------------------------------------------------------
+
+class TestMcpParallelToolCalls:
+    """Tests for the supports_parallel_tool_calls config option."""
+
+    def test_is_mcp_tool_parallel_safe_non_mcp_tool(self):
+        """Non-MCP tool names always return False."""
+        from tools.mcp_tool import is_mcp_tool_parallel_safe
+        assert is_mcp_tool_parallel_safe("web_search") is False
+        assert is_mcp_tool_parallel_safe("read_file") is False
+        assert is_mcp_tool_parallel_safe("terminal") is False
+        assert is_mcp_tool_parallel_safe("") is False
+
+    def test_is_mcp_tool_parallel_safe_no_servers(self):
+        """MCP tool from unknown server returns False."""
+        from tools.mcp_tool import is_mcp_tool_parallel_safe, _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.clear()
+        assert is_mcp_tool_parallel_safe("mcp_docs_search") is False
+
+    def test_is_mcp_tool_parallel_safe_with_flag(self):
+        """MCP tool from a parallel-safe server returns True."""
+        from tools.mcp_tool import is_mcp_tool_parallel_safe, _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.add("docs")
+        try:
+            assert is_mcp_tool_parallel_safe("mcp_docs_search") is True
+            assert is_mcp_tool_parallel_safe("mcp_docs_read_file") is True
+            # Different server should be False
+            assert is_mcp_tool_parallel_safe("mcp_github_list_repos") is False
+        finally:
+            with _lock:
+                _parallel_safe_servers.discard("docs")
+
+    def test_is_mcp_tool_parallel_safe_server_with_underscores(self):
+        """Server names containing underscores are correctly matched."""
+        from tools.mcp_tool import is_mcp_tool_parallel_safe, _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.add("my_server")
+        try:
+            assert is_mcp_tool_parallel_safe("mcp_my_server_query") is True
+        finally:
+            with _lock:
+                _parallel_safe_servers.discard("my_server")
+
+    def test_is_mcp_tool_parallel_safe_no_tool_suffix(self):
+        """Tool name that is just 'mcp_{server}' without a tool part returns False."""
+        from tools.mcp_tool import is_mcp_tool_parallel_safe, _parallel_safe_servers, _lock
+        with _lock:
+            _parallel_safe_servers.add("docs")
+        try:
+            # "mcp_docs" has no tool part after the server name
+            assert is_mcp_tool_parallel_safe("mcp_docs") is False
+            # "mcp_docs_" has empty tool part
+            assert is_mcp_tool_parallel_safe("mcp_docs_") is False
+        finally:
+            with _lock:
+                _parallel_safe_servers.discard("docs")
+
+    def test_register_mcp_servers_tracks_parallel_flag(self):
+        """register_mcp_servers populates _parallel_safe_servers from config."""
+        from tools.mcp_tool import (
+            register_mcp_servers, _parallel_safe_servers, _lock,
+            sanitize_mcp_name_component,
+        )
+        fake_config = {
+            "parallel_srv": {
+                "command": "echo",
+                "supports_parallel_tool_calls": True,
+            },
+            "serial_srv": {
+                "command": "echo",
+                "supports_parallel_tool_calls": False,
+            },
+            "default_srv": {
+                "command": "echo",
+                # no supports_parallel_tool_calls key
+            },
+        }
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._ensure_mcp_loop"), \
+             patch("tools.mcp_tool._run_on_mcp_loop"), \
+             patch("tools.mcp_tool._existing_tool_names", return_value=[]):
+            register_mcp_servers(fake_config)
+
+        with _lock:
+            assert sanitize_mcp_name_component("parallel_srv") in _parallel_safe_servers
+            assert sanitize_mcp_name_component("serial_srv") not in _parallel_safe_servers
+            assert sanitize_mcp_name_component("default_srv") not in _parallel_safe_servers
+            # Cleanup
+            _parallel_safe_servers.discard(sanitize_mcp_name_component("parallel_srv"))
+
+    def test_register_mcp_servers_removes_parallel_flag_on_toggle(self):
+        """Toggling supports_parallel_tool_calls to false removes server from the set."""
+        from tools.mcp_tool import (
+            register_mcp_servers, _parallel_safe_servers, _lock,
+            sanitize_mcp_name_component,
+        )
+
+        # First registration: parallel enabled
+        config_on = {
+            "toggle_srv": {
+                "command": "echo",
+                "supports_parallel_tool_calls": True,
+            },
+        }
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._ensure_mcp_loop"), \
+             patch("tools.mcp_tool._run_on_mcp_loop"), \
+             patch("tools.mcp_tool._existing_tool_names", return_value=[]):
+            register_mcp_servers(config_on)
+        with _lock:
+            assert sanitize_mcp_name_component("toggle_srv") in _parallel_safe_servers
+
+        # Second registration: parallel disabled
+        config_off = {
+            "toggle_srv": {
+                "command": "echo",
+                "supports_parallel_tool_calls": False,
+            },
+        }
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._ensure_mcp_loop"), \
+             patch("tools.mcp_tool._run_on_mcp_loop"), \
+             patch("tools.mcp_tool._existing_tool_names", return_value=[]):
+            register_mcp_servers(config_off)
+        with _lock:
+            assert sanitize_mcp_name_component("toggle_srv") not in _parallel_safe_servers
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index ba104cc4273..b24bb9705ad 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -24,6 +24,7 @@ Example config::
         args: ["-y", "@modelcontextprotocol/server-github"]
         env:
           GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
+        supports_parallel_tool_calls: true  # tools from this server may run concurrently
       remote_api:
         url: "https://my-mcp-server.example.com/mcp"
         headers:
@@ -56,6 +57,8 @@ Features:
     - Thread-safe architecture with dedicated background event loop
     - Sampling support: MCP servers can request LLM completions via
       sampling/createMessage (text and tool-use responses)
+    - Parallel tool call opt-in: per-server ``supports_parallel_tool_calls``
+      flag allows concurrent execution of tools from the same server
 
 Architecture:
     A dedicated background event loop (_mcp_loop) runs in a daemon thread.
@@ -1976,11 +1979,16 @@ def _handle_session_expired_and_retry(
     return None
 
 
+# Sanitized server names whose ``supports_parallel_tool_calls`` config is True.
+# Populated during ``register_mcp_servers()`` and queried by
+# ``is_mcp_tool_parallel_safe()`` for the parallel-execution check in run_agent.
+_parallel_safe_servers: set = set()
+
 # Dedicated event loop running in a background daemon thread.
 _mcp_loop: Optional[asyncio.AbstractEventLoop] = None
 _mcp_thread: Optional[threading.Thread] = None
 
-# Protects _mcp_loop, _mcp_thread, _servers, and _stdio_pids.
+# Protects _mcp_loop, _mcp_thread, _servers, _parallel_safe_servers, and _stdio_pids.
 _lock = threading.Lock()
 
 # PIDs of stdio MCP server subprocesses.  Tracked so we can force-kill
@@ -3098,6 +3106,12 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
             for k, v in servers.items()
             if k not in _servers and _parse_boolish(v.get("enabled", True), default=True)
         }
+        # Track which servers opt-in to parallel tool calls (idempotent).
+        for srv_name, srv_cfg in servers.items():
+            if _parse_boolish(srv_cfg.get("supports_parallel_tool_calls", False), default=False):
+                _parallel_safe_servers.add(sanitize_mcp_name_component(srv_name))
+            else:
+                _parallel_safe_servers.discard(sanitize_mcp_name_component(srv_name))
 
     if not new_servers:
         return _existing_tool_names()
@@ -3208,6 +3222,29 @@ def discover_mcp_tools() -> List[str]:
     return tool_names
 
 
+def is_mcp_tool_parallel_safe(tool_name: str) -> bool:
+    """Check if an MCP tool belongs to a server that supports parallel tool calls.
+
+    MCP tool names follow the pattern ``mcp_{server}_{tool}``.  This extracts
+    the server component and checks it against the set of servers whose config
+    includes ``supports_parallel_tool_calls: true``.
+
+    Returns False for non-MCP tools or tools from servers without the flag.
+    """
+    if not tool_name.startswith("mcp_"):
+        return False
+    # Strip the "mcp_" prefix and extract the server name.
+    # Tool names are: mcp_{sanitized_server}_{sanitized_tool}
+    # We need to check all possible server prefixes because the server name
+    # itself may contain underscores after sanitization.
+    rest = tool_name[4:]  # strip "mcp_"
+    with _lock:
+        for server_name in _parallel_safe_servers:
+            if rest.startswith(server_name + "_") and len(rest) > len(server_name) + 1:
+                return True
+    return False
+
+
 def get_mcp_status() -> List[dict]:
     """Return status of all configured MCP servers for banner display.
 
diff --git a/website/docs/reference/mcp-config-reference.md b/website/docs/reference/mcp-config-reference.md
index a87478f91fa..ecd6ad2c1a4 100644
--- a/website/docs/reference/mcp-config-reference.md
+++ b/website/docs/reference/mcp-config-reference.md
@@ -28,6 +28,7 @@ mcp_servers:
     enabled: true
     timeout: 120
     connect_timeout: 60
+    supports_parallel_tool_calls: false
     tools:
       include: []
       exclude: []
@@ -47,6 +48,7 @@ mcp_servers:
 | `enabled` | bool | both | Skip the server entirely when false |
 | `timeout` | number | both | Tool call timeout |
 | `connect_timeout` | number | both | Initial connection timeout |
+| `supports_parallel_tool_calls` | bool | both | Allow tools from this server to run concurrently |
 | `tools` | mapping | both | Filtering and utility-tool policy |
 | `auth` | string | HTTP | Authentication method. Set to `oauth` to enable OAuth 2.1 with PKCE |
 | `sampling` | mapping | both | Server-initiated LLM request policy (see MCP guide) |
diff --git a/website/docs/user-guide/features/mcp.md b/website/docs/user-guide/features/mcp.md
index b136af15c66..c1711a9f3ae 100644
--- a/website/docs/user-guide/features/mcp.md
+++ b/website/docs/user-guide/features/mcp.md
@@ -105,6 +105,7 @@ Hermes reads MCP config from `~/.hermes/config.yaml` under `mcp_servers`.
 | `timeout` | number | Tool call timeout |
 | `connect_timeout` | number | Initial connection timeout |
 | `enabled` | bool | If `false`, Hermes skips the server entirely |
+| `supports_parallel_tool_calls` | bool | If `true`, tools from this server may run concurrently |
 | `tools` | mapping | Per-server tool filtering and utility policy |
 
 ### Minimal stdio example
@@ -409,6 +410,23 @@ Because Hermes now only registers those wrappers when both are true:
 
 This is intentional and keeps the tool list honest.
 
+## Parallel Tool Calls
+
+By default, MCP tools run sequentially — one at a time. If your MCP server exposes tools that are safe to run concurrently (e.g. read-only queries, independent API calls), you can opt-in to parallel execution:
+
+```yaml
+mcp_servers:
+  docs:
+    command: "docs-server"
+    supports_parallel_tool_calls: true
+```
+
+When `supports_parallel_tool_calls` is `true`, Hermes may execute multiple tools from that server at the same time within a single tool-call batch, just like it does for built-in read-only tools (web_search, read_file, etc.).
+
+:::caution
+Only enable parallel calls for MCP servers whose tools are safe to run at the same time. If tools read and write shared state, files, databases, or external resources, review the read/write race conditions before enabling this setting.
+:::
+
 ## MCP Sampling Support
 
 MCP servers can request LLM inference from Hermes via the `sampling/createMessage` protocol. This allows an MCP server to ask Hermes to generate text on its behalf — useful for servers that need LLM capabilities but don't have their own model access.

From 6ba35ec336cfcf5e36f398750e630783f8715bac Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 01:24:25 -0700
Subject: [PATCH 119/218] Inspired by Claude Code: tighten dangerous-command
 detection (#26829)

Port three hardening patches from Claude Code 2.1.113's expanded deny
rules to hermes' detect_dangerous_command() pattern list.

1. macOS /private/{etc,var,tmp,home} system paths
   /etc, /var, /tmp, /home are symlinks to /private/<name> on macOS.
   A write to /private/etc/sudoers works identically to /etc/sudoers
   but bypassed the plain /etc/ pattern check. Extracted a shared
   _SYSTEM_CONFIG_PATH fragment so /etc/ and the /private/ mirror
   stay in sync across redirect / tee / cp / mv / install / sed -i
   patterns.

2. killall -9 / -KILL / -SIGKILL / -s KILL / -r <regex>
   Parallel to the existing pkill -9 pattern. killall -9 against
   non-hermes processes was previously unprotected, and killall -r
   can sweep unrelated processes matching a regex.

3. find -execdir rm
   Same destructive effect as find -exec rm but ran in each match's
   directory. The previous pattern required a literal '-exec ' so
   -execdir slipped through.

Guarded by 32 new test cases in 4 test classes:
  - TestMacOSPrivateSystemPaths  (11 cases)
  - TestKillallKillSignals       (9 cases)
  - TestFindExecdir              (4 cases)
  - TestEtcPatternsUnaffectedByRefactor  (6 regression guards on
    the existing /etc/ coverage after the _SYSTEM_CONFIG_PATH refactor)

Inspiration: https://github.com/anthropics/claude-code/releases
(Claude Code 2.1.113, April 17 2026 - "Enhanced deny rules" and
"Dangerous path protection")
---
 tests/tools/test_approval.py | 203 +++++++++++++++++++++++++++++++++++
 tools/approval.py            |  37 +++++--
 2 files changed, 233 insertions(+), 7 deletions(-)

diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py
index 7ec2d5868f1..0694dbcdc91 100644
--- a/tests/tools/test_approval.py
+++ b/tests/tools/test_approval.py
@@ -1102,3 +1102,206 @@ class TestDetectSudoStdin:
             "make 2>&1 | tee build.log"
         )
         assert is_dangerous is False
+
+
+class TestMacOSPrivateSystemPaths:
+    """Inspired by Claude Code 2.1.113 "dangerous path protection".
+
+    On macOS, /etc, /var, /tmp, /home are symlinks to
+    /private/{etc,var,tmp,home}. A command that writes to
+    /private/etc/sudoers works identically to /etc/sudoers but bypasses
+    a plain "/etc/" pattern check.  These tests guard the shared
+    _SYSTEM_CONFIG_PATH fragment used across redirect / tee / cp / mv /
+    install / sed -i patterns.
+    """
+
+    def test_private_etc_redirect(self):
+        dangerous, _, desc = detect_dangerous_command(
+            "echo 'root ALL=NOPASSWD: ALL' > /private/etc/sudoers"
+        )
+        assert dangerous is True
+        assert "system config" in desc.lower()
+
+    def test_private_var_redirect(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "echo payload > /private/var/db/dslocal/nodes/x"
+        )
+        assert dangerous is True
+
+    def test_private_etc_via_tee(self):
+        dangerous, _, desc = detect_dangerous_command(
+            "echo malicious | tee /private/etc/hosts"
+        )
+        assert dangerous is True
+        assert "tee" in desc.lower() or "system" in desc.lower()
+
+    def test_private_etc_cp(self):
+        dangerous, _, desc = detect_dangerous_command(
+            "cp malicious.conf /private/etc/hosts"
+        )
+        assert dangerous is True
+        assert "copy" in desc.lower() or "system config" in desc.lower()
+
+    def test_private_etc_mv(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "mv evil /private/etc/ssh/sshd_config"
+        )
+        assert dangerous is True
+
+    def test_private_etc_install(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "install -m 600 key /private/etc/ssh/keys"
+        )
+        assert dangerous is True
+
+    def test_private_etc_sed_in_place(self):
+        dangerous, _, desc = detect_dangerous_command(
+            "sed -i 's/root/pwned/' /private/etc/passwd"
+        )
+        assert dangerous is True
+        assert "in-place" in desc.lower() or "system config" in desc.lower()
+
+    def test_private_var_sed_long_flag(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "sed --in-place 's/x/y/' /private/var/log/wtmp"
+        )
+        assert dangerous is True
+
+    def test_private_tmp_cp(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "cp rootkit /private/tmp/payload"
+        )
+        assert dangerous is True
+
+    def test_ls_private_is_safe(self):
+        """Reading under /private/ must not trigger approval."""
+        dangerous, _, _ = detect_dangerous_command("ls /private")
+        assert dangerous is False
+
+    def test_echo_mentioning_private_path_is_safe(self):
+        """Literal mention of /private/etc in an echo string must not fire."""
+        dangerous, _, _ = detect_dangerous_command(
+            "echo 'the macOS path is /private/etc on disk'"
+        )
+        assert dangerous is False
+
+
+class TestKillallKillSignals:
+    """Inspired by Claude Code 2.1.113 expanded deny rules.
+
+    The existing pattern caught `pkill -9` but not the equivalent
+    `killall -9` / `-KILL` / `-s KILL` / `-r <regex>` broad sweeps that
+    can wipe out unrelated processes.
+    """
+
+    def test_killall_dash_9(self):
+        dangerous, _, desc = detect_dangerous_command("killall -9 firefox")
+        assert dangerous is True
+        assert "kill" in desc.lower()
+
+    def test_killall_dash_kill(self):
+        dangerous, _, _ = detect_dangerous_command("killall -KILL firefox")
+        assert dangerous is True
+
+    def test_killall_dash_sigkill(self):
+        dangerous, _, _ = detect_dangerous_command("killall -SIGKILL firefox")
+        assert dangerous is True
+
+    def test_killall_dash_s_kill(self):
+        dangerous, _, _ = detect_dangerous_command("killall -s KILL firefox")
+        assert dangerous is True
+
+    def test_killall_dash_s_signum(self):
+        dangerous, _, _ = detect_dangerous_command("killall -s 9 firefox")
+        assert dangerous is True
+
+    def test_killall_regex(self):
+        """killall -r <regex> is a broad sweep; require approval."""
+        dangerous, _, desc = detect_dangerous_command("killall -r 'fire.*'")
+        assert dangerous is True
+        assert "regex" in desc.lower() or "kill" in desc.lower()
+
+    def test_killall_combined_flags(self):
+        dangerous, _, _ = detect_dangerous_command("killall -9 -r 'herm.*'")
+        assert dangerous is True
+
+    def test_killall_list_signals_is_safe(self):
+        """`killall -l` lists signals and is harmless — must not fire."""
+        dangerous, _, _ = detect_dangerous_command("killall -l")
+        assert dangerous is False
+
+    def test_killall_version_is_safe(self):
+        dangerous, _, _ = detect_dangerous_command("killall -V")
+        assert dangerous is False
+
+
+class TestFindExecdir:
+    """Inspired by Claude Code 2.1.113 tightening of find rules.
+
+    `find -execdir rm` has the same destructive effect as `find -exec rm`
+    but ran in each match's directory. Previously missed because the
+    pattern required a literal `-exec ` followed by a space.
+    """
+
+    def test_find_execdir_rm(self):
+        dangerous, _, desc = detect_dangerous_command(
+            "find . -execdir rm {} \\;"
+        )
+        assert dangerous is True
+        assert "find" in desc.lower() or "rm" in desc.lower()
+
+    def test_find_execdir_with_absolute_rm(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "find /var -execdir /bin/rm -rf {} \\;"
+        )
+        assert dangerous is True
+
+    def test_find_exec_rm_still_caught(self):
+        """Original -exec pattern must still fire (regression guard)."""
+        dangerous, _, _ = detect_dangerous_command(
+            "find . -exec rm {} \\;"
+        )
+        assert dangerous is True
+
+    def test_find_execdir_ls_is_safe(self):
+        """-execdir with a read-only command is not dangerous."""
+        dangerous, _, _ = detect_dangerous_command(
+            "find . -execdir ls {} \\;"
+        )
+        assert dangerous is False
+
+
+class TestEtcPatternsUnaffectedByRefactor:
+    """Regression guard: the /etc/ patterns were refactored to share the
+    _SYSTEM_CONFIG_PATH fragment with the /private/ mirror. Make sure the
+    existing /etc/ coverage remains identical.
+    """
+
+    def test_etc_redirect(self):
+        dangerous, _, _ = detect_dangerous_command("echo x > /etc/hosts")
+        assert dangerous is True
+
+    def test_etc_cp(self):
+        dangerous, _, _ = detect_dangerous_command("cp evil /etc/hosts")
+        assert dangerous is True
+
+    def test_etc_sed_inline(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "sed -i 's/a/b/' /etc/hosts"
+        )
+        assert dangerous is True
+
+    def test_etc_tee(self):
+        dangerous, _, _ = detect_dangerous_command(
+            "echo x | tee /etc/hosts"
+        )
+        assert dangerous is True
+
+    def test_cat_etc_hostname_is_safe(self):
+        """Reading /etc/ files is safe — only writes require approval."""
+        dangerous, _, _ = detect_dangerous_command("cat /etc/hostname")
+        assert dangerous is False
+
+    def test_grep_etc_passwd_is_safe(self):
+        dangerous, _, _ = detect_dangerous_command("grep root /etc/passwd")
+        assert dangerous is False
diff --git a/tools/approval.py b/tools/approval.py
index 84d02cc6a98..cf5df644ff8 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -133,8 +133,19 @@ _CREDENTIAL_FILES = (
     r'(?:~|\$home|\$\{home\})/\.'
     r'(?:netrc|pgpass|npmrc|pypirc)\b'
 )
+# macOS: /etc, /var, /tmp, /home are symlinks to /private/{etc,var,tmp,home}.
+# A command written to target /private/etc/sudoers works identically to
+# /etc/sudoers on macOS but bypasses a plain "/etc/" pattern check. Match
+# both forms. Inspired by Claude Code 2.1.113's "dangerous path protection".
+_MACOS_PRIVATE_SYSTEM_PATH = r'/private/(?:etc|var|tmp|home)/'
+# System-config paths that should trigger approval for any write/edit,
+# collapsing /etc, its macOS /private/etc mirror, and /etc/sudoers.d/ into
+# one shared fragment so new DANGEROUS_PATTERNS stay consistent.
+_SYSTEM_CONFIG_PATH = (
+    rf'(?:/etc/|{_MACOS_PRIVATE_SYSTEM_PATH})'
+)
 _SENSITIVE_WRITE_TARGET = (
-    r'(?:/etc/|/dev/sd|'
+    rf'(?:{_SYSTEM_CONFIG_PATH}|/dev/sd|'
     rf'{_SSH_SENSITIVE_PATH}|'
     rf'{_HERMES_ENV_PATH}|'
     rf'{_SHELL_RC_FILES}|'
@@ -318,10 +329,17 @@ DANGEROUS_PATTERNS = [
     # *next* line to satisfy the negative lookahead, silently allowing DELETE without WHERE.
     (r'\bDELETE\s+FROM\b(?![^\n]*\bWHERE\b)', "SQL DELETE without WHERE"),
     (r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
-    (r'>\s*/etc/', "overwrite system config"),
+    (rf'>\s*{_SYSTEM_CONFIG_PATH}', "overwrite system config"),
     (r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"),
     (r'\bkill\s+-9\s+-1\b', "kill all processes"),
     (r'\bpkill\s+-9\b', "force kill processes"),
+    # killall with SIGKILL (parallel to pkill -9). Catches -9 / -KILL /
+    # -s KILL / -SIGKILL forms, and also `killall -r <regex>` broad sweeps
+    # that can wipe out unrelated processes by accident.
+    # Inspired by Claude Code 2.1.113 expanded deny rules.
+    (r'\bkillall\s+(-[^\s]*\s+)*-(9|KILL|SIGKILL)\b', "force kill processes (killall -KILL)"),
+    (r'\bkillall\s+(-[^\s]*\s+)*-s\s+(KILL|SIGKILL|9)\b', "force kill processes (killall -s KILL)"),
+    (r'\bkillall\s+(-[^\s]*\s+)*-r\b', "kill processes by regex (killall -r)"),
     (r':\(\)\s*\{\s*:\s*\|\s*:\s*&\s*\}\s*;\s*:', "fork bomb"),
     # Any shell invocation via -c or combined flags like -lc, -ic, etc.
     (r'\b(bash|sh|zsh|ksh)\s+-[^\s]*c(\s+|$)', "shell command via -c/-lc flag"),
@@ -333,7 +351,11 @@ DANGEROUS_PATTERNS = [
     (rf'\btee\b.*["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config via tee"),
     (rf'>>?\s*["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config via redirection"),
     (r'\bxargs\s+.*\brm\b', "xargs with rm"),
-    (r'\bfind\b.*-exec\s+(/\S*/)?rm\b', "find -exec rm"),
+    # find -exec rm / -execdir rm — the -execdir variant (same semantics,
+    # runs in the directory of each match) was previously missed. Claude
+    # Code 2.1.113 tightened their equivalent find rule to stop auto-
+    # approving -exec / -delete flags.
+    (r'\bfind\b.*-exec(?:dir)?\s+(/\S*/)?rm\b', "find -exec/-execdir rm"),
     (r'\bfind\b.*-delete\b', "find -delete"),
     # Gateway lifecycle protection: prevent the agent from killing its own
     # gateway process.  These commands trigger a gateway restart/stop that
@@ -351,11 +373,12 @@ DANGEROUS_PATTERNS = [
     # to regex at detection time. Catch the structural pattern instead.
     (r'\bkill\b.*\$\(\s*pgrep\b', "kill process via pgrep expansion (self-termination)"),
     (r'\bkill\b.*`\s*pgrep\b', "kill process via backtick pgrep expansion (self-termination)"),
-    # File copy/move/edit into sensitive system paths
-    (r'\b(cp|mv|install)\b.*\s/etc/', "copy/move file into /etc/"),
+    # File copy/move/edit into sensitive system paths (/etc/ and macOS
+    # /private/etc/ mirror).
+    (rf'\b(cp|mv|install)\b.*\s{_SYSTEM_CONFIG_PATH}', "copy/move file into system config path"),
     (rf'\b(cp|mv|install)\b.*\s["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config file"),
-    (r'\bsed\s+-[^\s]*i.*\s/etc/', "in-place edit of system config"),
-    (r'\bsed\s+--in-place\b.*\s/etc/', "in-place edit of system config (long flag)"),
+    (rf'\bsed\s+-[^\s]*i.*\s{_SYSTEM_CONFIG_PATH}', "in-place edit of system config"),
+    (rf'\bsed\s+--in-place\b.*\s{_SYSTEM_CONFIG_PATH}', "in-place edit of system config (long flag)"),
     # Script execution via heredoc — bypasses the -e/-c flag patterns above.
     # `python3 << 'EOF'` feeds arbitrary code via stdin without -c/-e flags.
     (r'\b(python[23]?|perl|ruby|node)\s+<<', "script execution via heredoc"),

From d725407c5645c84607df552da5175e9a628b9bf9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 01:25:25 -0700
Subject: [PATCH 120/218] security(deps): bump aiohttp, anthropic, cryptography
 to CVE-fixed versions (#26830)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10695. Picks up the still-vulnerable Python pins on current main:

- aiohttp 3.13.3 -> 3.13.4 (messaging, slack, homeassistant, sms extras +
  lazy_deps platform.slack) — CVE-2026-34513 (DNS cache exhaustion),
  CVE-2026-34518 (cookie/proxy-auth leak on cross-origin redirect, relevant
  for the gateway since it handles OAuth tokens), CVE-2026-34519 (response
  reason injection), CVE-2026-34520 (null bytes in headers), CVE-2026-34525
  (multiple Host headers).
- anthropic 0.86.0 -> 0.87.0 (anthropic extra + lazy_deps provider.anthropic)
  — CVE-2026-34450 (memory tool files created mode 0o666),
  CVE-2026-34452 (path-traversal in async local-filesystem memory tool).
  Not directly exploitable since hermes-agent doesn't use the SDK's
  filesystem memory tool, but the SDK is bumped for hygiene.
- cryptography pinned explicitly at 46.0.7 in core dependencies —
  CVE-2026-39892 (buffer overflow on non-contiguous buffers). Previously
  came in transitively via PyJWT[crypto]; the explicit floor keeps the
  WeCom/Weixin crypto paths from drifting below the fix.

curl-cffi from the original issue is no longer in pyproject.toml or uv.lock,
so no action needed there.

uv.lock regenerated cleanly; only aiohttp / anthropic / cryptography moved.

Credit: original issue + scoping by @shaun0927 (#10695, #10701).
Floor analysis and packaging-surface audit by @gnanirahulnutakki (#10784),
adapted to current main's exact-pin style.

Co-authored-by: shaun0927 <shaun0927@users.noreply.github.com>
Co-authored-by: Gnani Rahul Nutakki <gnanirahulnutakki@users.noreply.github.com>
---
 pyproject.toml     |  14 ++-
 tools/lazy_deps.py |   4 +-
 uv.lock            | 292 +++++++++++++++++++++++----------------------
 3 files changed, 158 insertions(+), 152 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fff11f6a5d9..c1591a6a9e0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,10 @@ dependencies = [
   "croniter==6.0.0",
   # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
   "PyJWT[crypto]==2.12.1",  # CVE-2026-32597
+  # Directly imported by WeCom/Weixin crypto paths and pulled transitively by
+  # PyJWT[crypto]; pin explicitly so the floor doesn't drift below the
+  # CVE-2026-39892 fix (buffer overflow on non-contiguous buffers).
+  "cryptography==46.0.7",  # CVE-2026-39892
   # Windows has no IANA tzdata shipped with the OS, so Python's ``zoneinfo``
   # (PEP 615) raises ``ZoneInfoNotFoundError`` for every non-UTC timezone
   # out of the box.  ``tzdata`` ships the Olson database as a data package
@@ -65,7 +69,7 @@ dependencies = [
 [project.optional-dependencies]
 # Native Anthropic provider — only needed when provider=anthropic (not via
 # OpenRouter or other aggregators).
-anthropic = ["anthropic==0.86.0"]
+anthropic = ["anthropic==0.87.0"]  # CVE-2026-34450, CVE-2026-34452
 # Web search backends — each only loaded when the user picks it as their
 # search provider (configured via `hermes tools` or config.yaml).
 exa = ["exa-py==2.10.2"]
@@ -81,9 +85,9 @@ daytona = ["daytona==0.155.0"]
 vercel = ["vercel==0.5.7"]
 hindsight = ["hindsight-client==0.6.1"]
 dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
-messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
+messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.4", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]  # aiohttp: CVE-2026-34513/34518/34519/34520/34525
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
-slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
+slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.4"]
 matrix = ["mautrix[encryption]==0.21.0", "Markdown==3.10.2", "aiosqlite==0.22.1", "asyncpg==0.31.0", "aiohttp-socks==0.11.0"]
 cli = ["simple-term-menu==1.6.6"]
 tts-premium = ["elevenlabs==1.59.0"]
@@ -100,8 +104,8 @@ pty = [
 ]
 honcho = ["honcho-ai==2.0.1"]
 mcp = ["mcp==1.26.0"]
-homeassistant = ["aiohttp==3.13.3"]
-sms = ["aiohttp==3.13.3"]
+homeassistant = ["aiohttp==3.13.4"]
+sms = ["aiohttp==3.13.4"]
 # Computer use — macOS background desktop control via cua-driver (MCP stdio).
 # The cua-driver binary itself is installed via `hermes tools` post-setup
 # (curl install script); this extra just pins the MCP client used to talk
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index 258a09ef667..faaf7ec42bf 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -78,7 +78,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
     # ─── Inference providers ───────────────────────────────────────────────
     # Native Anthropic SDK — needed when provider=anthropic (not via
     # OpenRouter / aggregators which use the openai SDK).
-    "provider.anthropic": ("anthropic==0.86.0",),
+    "provider.anthropic": ("anthropic==0.87.0",),  # CVE-2026-34450, CVE-2026-34452
     # AWS Bedrock provider
     "provider.bedrock": ("boto3==1.42.89",),
 
@@ -125,7 +125,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
     "platform.slack": (
         "slack-bolt==1.27.0",
         "slack-sdk==3.40.1",
-        "aiohttp==3.13.3",
+        "aiohttp==3.13.4",  # CVE-2026-34513/34518/34519/34520/34525
     ),
     "platform.matrix": (
         "mautrix[encryption]==0.21.0",
diff --git a/uv.lock b/uv.lock
index 2508637a081..eca62880304 100644
--- a/uv.lock
+++ b/uv.lock
@@ -40,7 +40,7 @@ wheels = [
 
 [[package]]
 name = "aiohttp"
-version = "3.13.3"
+version = "3.13.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
@@ -51,93 +51,93 @@ dependencies = [
     { name = "propcache" },
     { name = "yarl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/4a/064321452809dae953c1ed6e017504e72551a26b6f5708a5a80e4bf556ff/aiohttp-3.13.4.tar.gz", hash = "sha256:d97a6d09c66087890c2ab5d49069e1e570583f7ac0314ecf98294c1b6aaebd38", size = 7859748, upload-time = "2026-03-28T17:19:40.6Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f1/4c/a164164834f03924d9a29dc3acd9e7ee58f95857e0b467f6d04298594ebb/aiohttp-3.13.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5b6073099fb654e0a068ae678b10feff95c5cae95bbfcbfa7af669d361a8aa6b", size = 746051, upload-time = "2026-01-03T17:29:43.287Z" },
-    { url = "https://files.pythonhosted.org/packages/82/71/d5c31390d18d4f58115037c432b7e0348c60f6f53b727cad33172144a112/aiohttp-3.13.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cb93e166e6c28716c8c6aeb5f99dfb6d5ccf482d29fe9bf9a794110e6d0ab64", size = 499234, upload-time = "2026-01-03T17:29:44.822Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/c9/741f8ac91e14b1d2e7100690425a5b2b919a87a5075406582991fb7de920/aiohttp-3.13.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28e027cf2f6b641693a09f631759b4d9ce9165099d2b5d92af9bd4e197690eea", size = 494979, upload-time = "2026-01-03T17:29:46.405Z" },
-    { url = "https://files.pythonhosted.org/packages/75/b5/31d4d2e802dfd59f74ed47eba48869c1c21552c586d5e81a9d0d5c2ad640/aiohttp-3.13.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b61b7169ababd7802f9568ed96142616a9118dd2be0d1866e920e77ec8fa92a", size = 1748297, upload-time = "2026-01-03T17:29:48.083Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/3e/eefad0ad42959f226bb79664826883f2687d602a9ae2941a18e0484a74d3/aiohttp-3.13.3-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:80dd4c21b0f6237676449c6baaa1039abae86b91636b6c91a7f8e61c87f89540", size = 1707172, upload-time = "2026-01-03T17:29:49.648Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/3a/54a64299fac2891c346cdcf2aa6803f994a2e4beeaf2e5a09dcc54acc842/aiohttp-3.13.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:65d2ccb7eabee90ce0503c17716fc77226be026dcc3e65cce859a30db715025b", size = 1805405, upload-time = "2026-01-03T17:29:51.244Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/70/ddc1b7169cf64075e864f64595a14b147a895a868394a48f6a8031979038/aiohttp-3.13.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b179331a481cb5529fca8b432d8d3c7001cb217513c94cd72d668d1248688a3", size = 1899449, upload-time = "2026-01-03T17:29:53.938Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/7e/6815aab7d3a56610891c76ef79095677b8b5be6646aaf00f69b221765021/aiohttp-3.13.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d4c940f02f49483b18b079d1c27ab948721852b281f8b015c058100e9421dd1", size = 1748444, upload-time = "2026-01-03T17:29:55.484Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/f2/073b145c4100da5511f457dc0f7558e99b2987cf72600d42b559db856fbc/aiohttp-3.13.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f9444f105664c4ce47a2a7171a2418bce5b7bae45fb610f4e2c36045d85911d3", size = 1606038, upload-time = "2026-01-03T17:29:57.179Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/c1/778d011920cae03ae01424ec202c513dc69243cf2db303965615b81deeea/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:694976222c711d1d00ba131904beb60534f93966562f64440d0c9d41b8cdb440", size = 1724156, upload-time = "2026-01-03T17:29:58.914Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/cb/3419eabf4ec1e9ec6f242c32b689248365a1cf621891f6f0386632525494/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f33ed1a2bf1997a36661874b017f5c4b760f41266341af36febaf271d179f6d7", size = 1722340, upload-time = "2026-01-03T17:30:01.962Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/e5/76cf77bdbc435bf233c1f114edad39ed4177ccbfab7c329482b179cff4f4/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e636b3c5f61da31a92bf0d91da83e58fdfa96f178ba682f11d24f31944cdd28c", size = 1783041, upload-time = "2026-01-03T17:30:03.609Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/d4/dd1ca234c794fd29c057ce8c0566b8ef7fd6a51069de5f06fa84b9a1971c/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5d2d94f1f5fcbe40838ac51a6ab5704a6f9ea42e72ceda48de5e6b898521da51", size = 1596024, upload-time = "2026-01-03T17:30:05.132Z" },
-    { url = "https://files.pythonhosted.org/packages/55/58/4345b5f26661a6180afa686c473620c30a66afdf120ed3dd545bbc809e85/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2be0e9ccf23e8a94f6f0650ce06042cefc6ac703d0d7ab6c7a917289f2539ad4", size = 1804590, upload-time = "2026-01-03T17:30:07.135Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/06/05950619af6c2df7e0a431d889ba2813c9f0129cec76f663e547a5ad56f2/aiohttp-3.13.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9af5e68ee47d6534d36791bbe9b646d2a7c7deb6fc24d7943628edfbb3581f29", size = 1740355, upload-time = "2026-01-03T17:30:09.083Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/80/958f16de79ba0422d7c1e284b2abd0c84bc03394fbe631d0a39ffa10e1eb/aiohttp-3.13.3-cp311-cp311-win32.whl", hash = "sha256:a2212ad43c0833a873d0fb3c63fa1bacedd4cf6af2fee62bf4b739ceec3ab239", size = 433701, upload-time = "2026-01-03T17:30:10.869Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/f2/27cdf04c9851712d6c1b99df6821a6623c3c9e55956d4b1e318c337b5a48/aiohttp-3.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:642f752c3eb117b105acbd87e2c143de710987e09860d674e068c4c2c441034f", size = 457678, upload-time = "2026-01-03T17:30:12.719Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload-time = "2026-01-03T17:30:14.23Z" },
-    { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload-time = "2026-01-03T17:30:15.96Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload-time = "2026-01-03T17:30:17.431Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload-time = "2026-01-03T17:30:19.422Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload-time = "2026-01-03T17:30:21.756Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload-time = "2026-01-03T17:30:23.932Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload-time = "2026-01-03T17:30:26Z" },
-    { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload-time = "2026-01-03T17:30:27.554Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload-time = "2026-01-03T17:30:29.254Z" },
-    { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload-time = "2026-01-03T17:30:31.033Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload-time = "2026-01-03T17:30:32.703Z" },
-    { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload-time = "2026-01-03T17:30:34.695Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload-time = "2026-01-03T17:30:36.864Z" },
-    { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload-time = "2026-01-03T17:30:39.433Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload-time = "2026-01-03T17:30:41.081Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload-time = "2026-01-03T17:30:42.644Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" },
-    { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" },
-    { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" },
-    { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" },
-    { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" },
-    { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload-time = "2026-01-03T17:31:14.382Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" },
-    { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload-time = "2026-01-03T17:31:17.909Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload-time = "2026-01-03T17:31:19.919Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload-time = "2026-01-03T17:31:21.636Z" },
-    { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload-time = "2026-01-03T17:31:23.296Z" },
-    { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload-time = "2026-01-03T17:31:25.334Z" },
-    { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload-time = "2026-01-03T17:31:27.394Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload-time = "2026-01-03T17:31:29.238Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload-time = "2026-01-03T17:31:31.053Z" },
-    { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload-time = "2026-01-03T17:31:32.87Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload-time = "2026-01-03T17:31:34.76Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload-time = "2026-01-03T17:31:36.699Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload-time = "2026-01-03T17:31:38.622Z" },
-    { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload-time = "2026-01-03T17:31:40.57Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload-time = "2026-01-03T17:31:42.857Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload-time = "2026-01-03T17:31:44.984Z" },
-    { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126, upload-time = "2026-01-03T17:31:47.463Z" },
-    { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128, upload-time = "2026-01-03T17:31:49.2Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload-time = "2026-01-03T17:31:51.134Z" },
-    { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload-time = "2026-01-03T17:31:52.85Z" },
-    { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload-time = "2026-01-03T17:31:54.91Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload-time = "2026-01-03T17:31:56.733Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload-time = "2026-01-03T17:31:58.65Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload-time = "2026-01-03T17:32:00.989Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload-time = "2026-01-03T17:32:03.122Z" },
-    { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload-time = "2026-01-03T17:32:05.255Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload-time = "2026-01-03T17:32:07.607Z" },
-    { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload-time = "2026-01-03T17:32:09.59Z" },
-    { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload-time = "2026-01-03T17:32:11.445Z" },
-    { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload-time = "2026-01-03T17:32:13.705Z" },
-    { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload-time = "2026-01-03T17:32:15.965Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload-time = "2026-01-03T17:32:18.219Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload-time = "2026-01-03T17:32:20.25Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523, upload-time = "2026-01-03T17:32:22.215Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload-time = "2026-01-03T17:32:24.546Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/7e/cb94129302d78c46662b47f9897d642fd0b33bdfef4b73b20c6ced35aa4c/aiohttp-3.13.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8ea0c64d1bcbf201b285c2246c51a0c035ba3bbd306640007bc5844a3b4658c1", size = 760027, upload-time = "2026-03-28T17:15:33.022Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/cd/2db3c9397c3bd24216b203dd739945b04f8b87bb036c640da7ddb63c75ef/aiohttp-3.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6f742e1fa45c0ed522b00ede565e18f97e4cf8d1883a712ac42d0339dfb0cce7", size = 508325, upload-time = "2026-03-28T17:15:34.714Z" },
+    { url = "https://files.pythonhosted.org/packages/36/a3/d28b2722ec13107f2e37a86b8a169897308bab6a3b9e071ecead9d67bd9b/aiohttp-3.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dcfb50ee25b3b7a1222a9123be1f9f89e56e67636b561441f0b304e25aaef8f", size = 502402, upload-time = "2026-03-28T17:15:36.409Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/d6/acd47b5f17c4430e555590990a4746efbcb2079909bb865516892bf85f37/aiohttp-3.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3262386c4ff370849863ea93b9ea60fd59c6cf56bf8f93beac625cf4d677c04d", size = 1771224, upload-time = "2026-03-28T17:15:38.223Z" },
+    { url = "https://files.pythonhosted.org/packages/98/af/af6e20113ba6a48fd1cd9e5832c4851e7613ef50c7619acdaee6ec5f1aff/aiohttp-3.13.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:473bb5aa4218dd254e9ae4834f20e31f5a0083064ac0136a01a62ddbae2eaa42", size = 1731530, upload-time = "2026-03-28T17:15:39.988Z" },
+    { url = "https://files.pythonhosted.org/packages/81/16/78a2f5d9c124ad05d5ce59a9af94214b6466c3491a25fb70760e98e9f762/aiohttp-3.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e56423766399b4c77b965f6aaab6c9546617b8994a956821cc507d00b91d978c", size = 1827925, upload-time = "2026-03-28T17:15:41.944Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/1f/79acf0974ced805e0e70027389fccbb7d728e6f30fcac725fb1071e63075/aiohttp-3.13.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8af249343fafd5ad90366a16d230fc265cf1149f26075dc9fe93cfd7c7173942", size = 1923579, upload-time = "2026-03-28T17:15:44.071Z" },
+    { url = "https://files.pythonhosted.org/packages/af/53/29f9e2054ea6900413f3b4c3eb9d8331f60678ec855f13ba8714c47fd48d/aiohttp-3.13.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bc0a5cf4f10ef5a2c94fdde488734b582a3a7a000b131263e27c9295bd682d9", size = 1767655, upload-time = "2026-03-28T17:15:45.911Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/57/462fe1d3da08109ba4aa8590e7aed57c059af2a7e80ec21f4bac5cfe1094/aiohttp-3.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5c7ff1028e3c9fc5123a865ce17df1cb6424d180c503b8517afbe89aa566e6be", size = 1630439, upload-time = "2026-03-28T17:15:48.11Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/4b/4813344aacdb8127263e3eec343d24e973421143826364fa9fc847f6283f/aiohttp-3.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ba5cf98b5dcb9bddd857da6713a503fa6d341043258ca823f0f5ab7ab4a94ee8", size = 1745557, upload-time = "2026-03-28T17:15:50.13Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/01/1ef1adae1454341ec50a789f03cfafe4c4ac9c003f6a64515ecd32fe4210/aiohttp-3.13.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:d85965d3ba21ee4999e83e992fecb86c4614d6920e40705501c0a1f80a583c12", size = 1741796, upload-time = "2026-03-28T17:15:52.351Z" },
+    { url = "https://files.pythonhosted.org/packages/22/04/8cdd99af988d2aa6922714d957d21383c559835cbd43fbf5a47ddf2e0f05/aiohttp-3.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:49f0b18a9b05d79f6f37ddd567695943fcefb834ef480f17a4211987302b2dc7", size = 1805312, upload-time = "2026-03-28T17:15:54.407Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/7f/b48d5577338d4b25bbdbae35c75dbfd0493cb8886dc586fbfb2e90862239/aiohttp-3.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7f78cb080c86fbf765920e5f1ef35af3f24ec4314d6675d0a21eaf41f6f2679c", size = 1621751, upload-time = "2026-03-28T17:15:56.564Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/89/4eecad8c1858e6d0893c05929e22343e0ebe3aec29a8a399c65c3cc38311/aiohttp-3.13.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:67a3ec705534a614b68bbf1c70efa777a21c3da3895d1c44510a41f5a7ae0453", size = 1826073, upload-time = "2026-03-28T17:15:58.489Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/5c/9dc8293ed31b46c39c9c513ac7ca152b3c3d38e0ea111a530ad12001b827/aiohttp-3.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d6630ec917e85c5356b2295744c8a97d40f007f96a1c76bf1928dc2e27465393", size = 1760083, upload-time = "2026-03-28T17:16:00.677Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/19/8bbf6a4994205d96831f97b7d21a0feed120136e6267b5b22d229c6dc4dc/aiohttp-3.13.4-cp311-cp311-win32.whl", hash = "sha256:54049021bc626f53a5394c29e8c444f726ee5a14b6e89e0ad118315b1f90f5e3", size = 439690, upload-time = "2026-03-28T17:16:02.902Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/f5/ac409ecd1007528d15c3e8c3a57d34f334c70d76cfb7128a28cffdebd4c1/aiohttp-3.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:c033f2bc964156030772d31cbf7e5defea181238ce1f87b9455b786de7d30145", size = 463824, upload-time = "2026-03-28T17:16:05.058Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/bd/ede278648914cabbabfdf95e436679b5d4156e417896a9b9f4587169e376/aiohttp-3.13.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ee62d4471ce86b108b19c3364db4b91180d13fe3510144872d6bad5401957360", size = 752158, upload-time = "2026-03-28T17:16:06.901Z" },
+    { url = "https://files.pythonhosted.org/packages/90/de/581c053253c07b480b03785196ca5335e3c606a37dc73e95f6527f1591fe/aiohttp-3.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c0fd8f41b54b58636402eb493afd512c23580456f022c1ba2db0f810c959ed0d", size = 501037, upload-time = "2026-03-28T17:16:08.82Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f9/a5ede193c08f13cc42c0a5b50d1e246ecee9115e4cf6e900d8dbd8fd6acb/aiohttp-3.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4baa48ce49efd82d6b1a0be12d6a36b35e5594d1dd42f8bfba96ea9f8678b88c", size = 501556, upload-time = "2026-03-28T17:16:10.63Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/10/88ff67cd48a6ec36335b63a640abe86135791544863e0cfe1f065d6cef7a/aiohttp-3.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d738ebab9f71ee652d9dbd0211057690022201b11197f9a7324fd4dba128aa97", size = 1757314, upload-time = "2026-03-28T17:16:12.498Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/15/fdb90a5cf5a1f52845c276e76298c75fbbcc0ac2b4a86551906d54529965/aiohttp-3.13.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0ce692c3468fa831af7dceed52edf51ac348cebfc8d3feb935927b63bd3e8576", size = 1731819, upload-time = "2026-03-28T17:16:14.558Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/df/28146785a007f7820416be05d4f28cc207493efd1e8c6c1068e9bdc29198/aiohttp-3.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8e08abcfe752a454d2cb89ff0c08f2d1ecd057ae3e8cc6d84638de853530ebab", size = 1793279, upload-time = "2026-03-28T17:16:16.594Z" },
+    { url = "https://files.pythonhosted.org/packages/10/47/689c743abf62ea7a77774d5722f220e2c912a77d65d368b884d9779ef41b/aiohttp-3.13.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5977f701b3fff36367a11087f30ea73c212e686d41cd363c50c022d48b011d8d", size = 1891082, upload-time = "2026-03-28T17:16:18.71Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/b6/f7f4f318c7e58c23b761c9b13b9a3c9b394e0f9d5d76fbc6622fa98509f6/aiohttp-3.13.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:54203e10405c06f8b6020bd1e076ae0fe6c194adcee12a5a78af3ffa3c57025e", size = 1773938, upload-time = "2026-03-28T17:16:21.125Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/06/f207cb3121852c989586a6fc16ff854c4fcc8651b86c5d3bd1fc83057650/aiohttp-3.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:358a6af0145bc4dda037f13167bef3cce54b132087acc4c295c739d05d16b1c3", size = 1579548, upload-time = "2026-03-28T17:16:23.588Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/58/e1289661a32161e24c1fe479711d783067210d266842523752869cc1d9c2/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:898ea1850656d7d61832ef06aa9846ab3ddb1621b74f46de78fbc5e1a586ba83", size = 1714669, upload-time = "2026-03-28T17:16:25.713Z" },
+    { url = "https://files.pythonhosted.org/packages/96/0a/3e86d039438a74a86e6a948a9119b22540bae037d6ba317a042ae3c22711/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7bc30cceb710cf6a44e9617e43eebb6e3e43ad855a34da7b4b6a73537d8a6763", size = 1754175, upload-time = "2026-03-28T17:16:28.18Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/30/e717fc5df83133ba467a560b6d8ef20197037b4bb5d7075b90037de1018e/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4a31c0c587a8a038f19a4c7e60654a6c899c9de9174593a13e7cc6e15ff271f9", size = 1762049, upload-time = "2026-03-28T17:16:30.941Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/28/8f7a2d4492e336e40005151bdd94baf344880a4707573378579f833a64c1/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2062f675f3fe6e06d6113eb74a157fb9df58953ffed0cdb4182554b116545758", size = 1570861, upload-time = "2026-03-28T17:16:32.953Z" },
+    { url = "https://files.pythonhosted.org/packages/78/45/12e1a3d0645968b1c38de4b23fdf270b8637735ea057d4f84482ff918ad9/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d1ba8afb847ff80626d5e408c1fdc99f942acc877d0702fe137015903a220a9", size = 1790003, upload-time = "2026-03-28T17:16:35.468Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/0f/60374e18d590de16dcb39d6ff62f39c096c1b958e6f37727b5870026ea30/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b08149419994cdd4d5eecf7fd4bc5986b5a9380285bcd01ab4c0d6bfca47b79d", size = 1737289, upload-time = "2026-03-28T17:16:38.187Z" },
+    { url = "https://files.pythonhosted.org/packages/02/bf/535e58d886cfbc40a8b0013c974afad24ef7632d645bca0b678b70033a60/aiohttp-3.13.4-cp312-cp312-win32.whl", hash = "sha256:fc432f6a2c4f720180959bc19aa37259651c1a4ed8af8afc84dd41c60f15f791", size = 434185, upload-time = "2026-03-28T17:16:40.735Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/1a/d92e3325134ebfff6f4069f270d3aac770d63320bd1fcd0eca023e74d9a8/aiohttp-3.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:6148c9ae97a3e8bff9a1fc9c757fa164116f86c100468339730e717590a3fb77", size = 461285, upload-time = "2026-03-28T17:16:42.713Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/ac/892f4162df9b115b4758d615f32ec63d00f3084c705ff5526630887b9b42/aiohttp-3.13.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:63dd5e5b1e43b8fb1e91b79b7ceba1feba588b317d1edff385084fcc7a0a4538", size = 745744, upload-time = "2026-03-28T17:16:44.67Z" },
+    { url = "https://files.pythonhosted.org/packages/97/a9/c5b87e4443a2f0ea88cb3000c93a8fdad1ee63bffc9ded8d8c8e0d66efc6/aiohttp-3.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:746ac3cc00b5baea424dacddea3ec2c2702f9590de27d837aa67004db1eebc6e", size = 498178, upload-time = "2026-03-28T17:16:46.766Z" },
+    { url = "https://files.pythonhosted.org/packages/94/42/07e1b543a61250783650df13da8ddcdc0d0a5538b2bd15cef6e042aefc61/aiohttp-3.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bda8f16ea99d6a6705e5946732e48487a448be874e54a4f73d514660ff7c05d3", size = 498331, upload-time = "2026-03-28T17:16:48.9Z" },
+    { url = "https://files.pythonhosted.org/packages/20/d6/492f46bf0328534124772d0cf58570acae5b286ea25006900650f69dae0e/aiohttp-3.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4b061e7b5f840391e3f64d0ddf672973e45c4cfff7a0feea425ea24e51530fc2", size = 1744414, upload-time = "2026-03-28T17:16:50.968Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/4d/e02627b2683f68051246215d2d62b2d2f249ff7a285e7a858dc47d6b6a14/aiohttp-3.13.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b252e8d5cd66184b570d0d010de742736e8a4fab22c58299772b0c5a466d4b21", size = 1719226, upload-time = "2026-03-28T17:16:53.173Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/6c/5d0a3394dd2b9f9aeba6e1b6065d0439e4b75d41f1fb09a3ec010b43552b/aiohttp-3.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20af8aad61d1803ff11152a26146d8d81c266aa8c5aa9b4504432abb965c36a0", size = 1782110, upload-time = "2026-03-28T17:16:55.362Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/2d/c20791e3437700a7441a7edfb59731150322424f5aadf635602d1d326101/aiohttp-3.13.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:13a5cc924b59859ad2adb1478e31f410a7ed46e92a2a619d6d1dd1a63c1a855e", size = 1884809, upload-time = "2026-03-28T17:16:57.734Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/94/d99dbfbd1924a87ef643833932eb2a3d9e5eee87656efea7d78058539eff/aiohttp-3.13.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:534913dfb0a644d537aebb4123e7d466d94e3be5549205e6a31f72368980a81a", size = 1764938, upload-time = "2026-03-28T17:17:00.221Z" },
+    { url = "https://files.pythonhosted.org/packages/49/61/3ce326a1538781deb89f6cf5e094e2029cd308ed1e21b2ba2278b08426f6/aiohttp-3.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:320e40192a2dcc1cf4b5576936e9652981ab596bf81eb309535db7e2f5b5672f", size = 1570697, upload-time = "2026-03-28T17:17:02.985Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/77/4ab5a546857bb3028fbaf34d6eea180267bdab022ee8b1168b1fcde4bfdd/aiohttp-3.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9e587fcfce2bcf06526a43cb705bdee21ac089096f2e271d75de9c339db3100c", size = 1702258, upload-time = "2026-03-28T17:17:05.28Z" },
+    { url = "https://files.pythonhosted.org/packages/79/63/d8f29021e39bc5af8e5d5e9da1b07976fb9846487a784e11e4f4eeda4666/aiohttp-3.13.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9eb9c2eea7278206b5c6c1441fdd9dc420c278ead3f3b2cc87f9b693698cc500", size = 1740287, upload-time = "2026-03-28T17:17:07.712Z" },
+    { url = "https://files.pythonhosted.org/packages/55/3a/cbc6b3b124859a11bc8055d3682c26999b393531ef926754a3445b99dfef/aiohttp-3.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:29be00c51972b04bf9d5c8f2d7f7314f48f96070ca40a873a53056e652e805f7", size = 1753011, upload-time = "2026-03-28T17:17:10.053Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/30/836278675205d58c1368b21520eab9572457cf19afd23759216c04483048/aiohttp-3.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:90c06228a6c3a7c9f776fe4fc0b7ff647fffd3bed93779a6913c804ae00c1073", size = 1566359, upload-time = "2026-03-28T17:17:12.433Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b4/8032cc9b82d17e4277704ba30509eaccb39329dc18d6a35f05e424439e32/aiohttp-3.13.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:a533ec132f05fd9a1d959e7f34184cd7d5e8511584848dab85faefbaac573069", size = 1785537, upload-time = "2026-03-28T17:17:14.721Z" },
+    { url = "https://files.pythonhosted.org/packages/17/7d/5873e98230bde59f493bf1f7c3e327486a4b5653fa401144704df5d00211/aiohttp-3.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1c946f10f413836f82ea4cfb90200d2a59578c549f00857e03111cf45ad01ca5", size = 1740752, upload-time = "2026-03-28T17:17:17.387Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/f2/13e46e0df051494d7d3c68b7f72d071f48c384c12716fc294f75d5b1a064/aiohttp-3.13.4-cp313-cp313-win32.whl", hash = "sha256:48708e2706106da6967eff5908c78ca3943f005ed6bcb75da2a7e4da94ef8c70", size = 433187, upload-time = "2026-03-28T17:17:19.523Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/c0/649856ee655a843c8f8664592cfccb73ac80ede6a8c8db33a25d810c12db/aiohttp-3.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:74a2eb058da44fa3a877a49e2095b591d4913308bb424c418b77beb160c55ce3", size = 459778, upload-time = "2026-03-28T17:17:21.964Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/29/6657cc37ae04cacc2dbf53fb730a06b6091cc4cbe745028e047c53e6d840/aiohttp-3.13.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:e0a2c961fc92abeff61d6444f2ce6ad35bb982db9fc8ff8a47455beacf454a57", size = 749363, upload-time = "2026-03-28T17:17:24.044Z" },
+    { url = "https://files.pythonhosted.org/packages/90/7f/30ccdf67ca3d24b610067dc63d64dcb91e5d88e27667811640644aa4a85d/aiohttp-3.13.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:153274535985a0ff2bff1fb6c104ed547cec898a09213d21b0f791a44b14d933", size = 499317, upload-time = "2026-03-28T17:17:26.199Z" },
+    { url = "https://files.pythonhosted.org/packages/93/13/e372dd4e68ad04ee25dafb050c7f98b0d91ea643f7352757e87231102555/aiohttp-3.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:351f3171e2458da3d731ce83f9e6b9619e325c45cbd534c7759750cabf453ad7", size = 500477, upload-time = "2026-03-28T17:17:28.279Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/fe/ee6298e8e586096fb6f5eddd31393d8544f33ae0792c71ecbb4c2bef98ac/aiohttp-3.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f989ac8bc5595ff761a5ccd32bdb0768a117f36dd1504b1c2c074ed5d3f4df9c", size = 1737227, upload-time = "2026-03-28T17:17:30.587Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/b9/a7a0463a09e1a3fe35100f74324f23644bfc3383ac5fd5effe0722a5f0b7/aiohttp-3.13.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d36fc1709110ec1e87a229b201dd3ddc32aa01e98e7868083a794609b081c349", size = 1694036, upload-time = "2026-03-28T17:17:33.29Z" },
+    { url = "https://files.pythonhosted.org/packages/57/7c/8972ae3fb7be00a91aee6b644b2a6a909aedb2c425269a3bfd90115e6f8f/aiohttp-3.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42adaeea83cbdf069ab94f5103ce0787c21fb1a0153270da76b59d5578302329", size = 1786814, upload-time = "2026-03-28T17:17:36.035Z" },
+    { url = "https://files.pythonhosted.org/packages/93/01/c81e97e85c774decbaf0d577de7d848934e8166a3a14ad9f8aa5be329d28/aiohttp-3.13.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:92deb95469928cc41fd4b42a95d8012fa6df93f6b1c0a83af0ffbc4a5e218cde", size = 1866676, upload-time = "2026-03-28T17:17:38.441Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/5f/5b46fe8694a639ddea2cd035bf5729e4677ea882cb251396637e2ef1590d/aiohttp-3.13.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0c0c7c07c4257ef3a1df355f840bc62d133bcdef5c1c5ba75add3c08553e2eed", size = 1740842, upload-time = "2026-03-28T17:17:40.783Z" },
+    { url = "https://files.pythonhosted.org/packages/20/a2/0d4b03d011cca6b6b0acba8433193c1e484efa8d705ea58295590fe24203/aiohttp-3.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f062c45de8a1098cb137a1898819796a2491aec4e637a06b03f149315dff4d8f", size = 1566508, upload-time = "2026-03-28T17:17:43.235Z" },
+    { url = "https://files.pythonhosted.org/packages/98/17/e689fd500da52488ec5f889effd6404dece6a59de301e380f3c64f167beb/aiohttp-3.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:76093107c531517001114f0ebdb4f46858ce818590363e3e99a4a2280334454a", size = 1700569, upload-time = "2026-03-28T17:17:46.165Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/0d/66402894dbcf470ef7db99449e436105ea862c24f7ea4c95c683e635af35/aiohttp-3.13.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:6f6ec32162d293b82f8b63a16edc80769662fbd5ae6fbd4936d3206a2c2cc63b", size = 1707407, upload-time = "2026-03-28T17:17:48.825Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/eb/af0ab1a3650092cbd8e14ef29e4ab0209e1460e1c299996c3f8288b3f1ff/aiohttp-3.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5903e2db3d202a00ad9f0ec35a122c005e85d90c9836ab4cda628f01edf425e2", size = 1752214, upload-time = "2026-03-28T17:17:51.206Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/bf/72326f8a98e4c666f292f03c385545963cc65e358835d2a7375037a97b57/aiohttp-3.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2d5bea57be7aca98dbbac8da046d99b5557c5cf4e28538c4c786313078aca09e", size = 1562162, upload-time = "2026-03-28T17:17:53.634Z" },
+    { url = "https://files.pythonhosted.org/packages/67/9f/13b72435f99151dd9a5469c96b3b5f86aa29b7e785ca7f35cf5e538f74c0/aiohttp-3.13.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:bcf0c9902085976edc0232b75006ef38f89686901249ce14226b6877f88464fb", size = 1768904, upload-time = "2026-03-28T17:17:55.991Z" },
+    { url = "https://files.pythonhosted.org/packages/18/bc/28d4970e7d5452ac7776cdb5431a1164a0d9cf8bd2fffd67b4fb463aa56d/aiohttp-3.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3295f98bfeed2e867cab588f2a146a9db37a85e3ae9062abf46ba062bd29165", size = 1723378, upload-time = "2026-03-28T17:17:58.348Z" },
+    { url = "https://files.pythonhosted.org/packages/53/74/b32458ca1a7f34d65bdee7aef2036adbe0438123d3d53e2b083c453c24dd/aiohttp-3.13.4-cp314-cp314-win32.whl", hash = "sha256:a598a5c5767e1369d8f5b08695cab1d8160040f796c4416af76fd773d229b3c9", size = 438711, upload-time = "2026-03-28T17:18:00.728Z" },
+    { url = "https://files.pythonhosted.org/packages/40/b2/54b487316c2df3e03a8f3435e9636f8a81a42a69d942164830d193beb56a/aiohttp-3.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:c555db4bc7a264bead5a7d63d92d41a1122fcd39cc62a4db815f45ad46f9c2c8", size = 464977, upload-time = "2026-03-28T17:18:03.367Z" },
+    { url = "https://files.pythonhosted.org/packages/47/fb/e41b63c6ce71b07a59243bb8f3b457ee0c3402a619acb9d2c0d21ef0e647/aiohttp-3.13.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45abbbf09a129825d13c18c7d3182fecd46d9da3cfc383756145394013604ac1", size = 781549, upload-time = "2026-03-28T17:18:05.779Z" },
+    { url = "https://files.pythonhosted.org/packages/97/53/532b8d28df1e17e44c4d9a9368b78dcb6bf0b51037522136eced13afa9e8/aiohttp-3.13.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:74c80b2bc2c2adb7b3d1941b2b60701ee2af8296fc8aad8b8bc48bc25767266c", size = 514383, upload-time = "2026-03-28T17:18:08.096Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/1f/62e5d400603e8468cd635812d99cb81cfdc08127a3dc474c647615f31339/aiohttp-3.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c97989ae40a9746650fa196894f317dafc12227c808c774929dda0ff873a5954", size = 518304, upload-time = "2026-03-28T17:18:10.642Z" },
+    { url = "https://files.pythonhosted.org/packages/90/57/2326b37b10896447e3c6e0cbef4fe2486d30913639a5cfd1332b5d870f82/aiohttp-3.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dae86be9811493f9990ef44fff1685f5c1a3192e9061a71a109d527944eed551", size = 1893433, upload-time = "2026-03-28T17:18:13.121Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/b4/a24d82112c304afdb650167ef2fe190957d81cbddac7460bedd245f765aa/aiohttp-3.13.4-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1db491abe852ca2fa6cc48a3341985b0174b3741838e1341b82ac82c8bd9e871", size = 1755901, upload-time = "2026-03-28T17:18:16.21Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/2d/0883ef9d878d7846287f036c162a951968f22aabeef3ac97b0bea6f76d5d/aiohttp-3.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0e5d701c0aad02a7dce72eef6b93226cf3734330f1a31d69ebbf69f33b86666e", size = 1876093, upload-time = "2026-03-28T17:18:18.703Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/52/9204bb59c014869b71971addad6778f005daa72a96eed652c496789d7468/aiohttp-3.13.4-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8ac32a189081ae0a10ba18993f10f338ec94341f0d5df8fff348043962f3c6f8", size = 1970815, upload-time = "2026-03-28T17:18:21.858Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/b5/e4eb20275a866dde0f570f411b36c6b48f7b53edfe4f4071aa1b0728098a/aiohttp-3.13.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98e968cdaba43e45c73c3f306fca418c8009a957733bac85937c9f9cf3f4de27", size = 1816223, upload-time = "2026-03-28T17:18:24.729Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/23/e98075c5bb146aa61a1239ee1ac7714c85e814838d6cebbe37d3fe19214a/aiohttp-3.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca114790c9144c335d538852612d3e43ea0f075288f4849cf4b05d6cd2238ce7", size = 1649145, upload-time = "2026-03-28T17:18:27.269Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/c1/7bad8be33bb06c2bb224b6468874346026092762cbec388c3bdb65a368ee/aiohttp-3.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ea2e071661ba9cfe11eabbc81ac5376eaeb3061f6e72ec4cc86d7cdd1ffbdbbb", size = 1816562, upload-time = "2026-03-28T17:18:29.847Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/10/c00323348695e9a5e316825969c88463dcc24c7e9d443244b8a2c9cf2eae/aiohttp-3.13.4-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:34e89912b6c20e0fd80e07fa401fd218a410aa1ce9f1c2f1dad6db1bd0ce0927", size = 1800333, upload-time = "2026-03-28T17:18:32.269Z" },
+    { url = "https://files.pythonhosted.org/packages/84/43/9b2147a1df3559f49bd723e22905b46a46c068a53adb54abdca32c4de180/aiohttp-3.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0e217cf9f6a42908c52b46e42c568bd57adc39c9286ced31aaace614b6087965", size = 1820617, upload-time = "2026-03-28T17:18:35.238Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/7f/b3481a81e7a586d02e99387b18c6dafff41285f6efd3daa2124c01f87eae/aiohttp-3.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:0c296f1221e21ba979f5ac1964c3b78cfde15c5c5f855ffd2caab337e9cd9182", size = 1643417, upload-time = "2026-03-28T17:18:37.949Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/72/07181226bc99ce1124e0f89280f5221a82d3ae6a6d9d1973ce429d48e52b/aiohttp-3.13.4-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d99a9d168ebaffb74f36d011750e490085ac418f4db926cce3989c8fe6cb6b1b", size = 1849286, upload-time = "2026-03-28T17:18:40.534Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/e6/1b3566e103eca6da5be4ae6713e112a053725c584e96574caf117568ffef/aiohttp-3.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cb19177205d93b881f3f89e6081593676043a6828f59c78c17a0fd6c1fbed2ba", size = 1782635, upload-time = "2026-03-28T17:18:43.073Z" },
+    { url = "https://files.pythonhosted.org/packages/37/58/1b11c71904b8d079eb0c39fe664180dd1e14bebe5608e235d8bfbadc8929/aiohttp-3.13.4-cp314-cp314t-win32.whl", hash = "sha256:c606aa5656dab6552e52ca368e43869c916338346bfaf6304e15c58fb113ea30", size = 472537, upload-time = "2026-03-28T17:18:46.286Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/8f/87c56a1a1977d7dddea5b31e12189665a140fdb48a71e9038ff90bb564ec/aiohttp-3.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:014dcc10ec8ab8db681f0d68e939d1e9286a5aa2b993cbbdb0db130853e02144", size = 506381, upload-time = "2026-03-28T17:18:48.74Z" },
 ]
 
 [[package]]
@@ -321,7 +321,7 @@ wheels = [
 
 [[package]]
 name = "anthropic"
-version = "0.86.0"
+version = "0.87.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -333,9 +333,9 @@ dependencies = [
     { name = "sniffio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/37/7a/8b390dc47945d3169875d342847431e5f7d5fa716b2e37494d57cfc1db10/anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5", size = 583820, upload-time = "2026-03-18T18:43:08.017Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/8f/3281edf7c35cbac169810e5388eb9b38678c7ea9867c2d331237bd5dff08/anthropic-0.87.0.tar.gz", hash = "sha256:098fef3753cdd3c0daa86f95efb9c8d03a798d45c5170329525bb4653f6702d0", size = 588982, upload-time = "2026-03-31T17:52:41.697Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/5f/67db29c6e5d16c8c9c4652d3efb934d89cb750cad201539141781d8eae14/anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57", size = 469400, upload-time = "2026-03-18T18:43:06.526Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/02/99bf351933bdea0545a2b6e2d812ed878899e9a95f618351dfa3d0de0e69/anthropic-0.87.0-py3-none-any.whl", hash = "sha256:e2669b86d42c739d3df163f873c51719552e263a3d85179297180fb4fa00a236", size = 472126, upload-time = "2026-03-31T17:52:40.174Z" },
 ]
 
 [[package]]
@@ -787,61 +787,61 @@ wheels = [
 
 [[package]]
 name = "cryptography"
-version = "46.0.5"
+version = "46.0.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/93/ac8f3d5ff04d54bc814e961a43ae5b0b146154c89c61b47bb07557679b18/cryptography-46.0.7.tar.gz", hash = "sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5", size = 750652, upload-time = "2026-04-08T01:57:54.692Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" },
-    { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" },
-    { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" },
-    { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" },
-    { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" },
-    { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" },
-    { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" },
-    { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" },
-    { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" },
-    { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" },
-    { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" },
-    { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" },
-    { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" },
-    { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" },
-    { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" },
-    { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" },
-    { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" },
-    { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/dd/2d9fdb07cebdf3d51179730afb7d5e576153c6744c3ff8fded23030c204e/cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c", size = 3476964, upload-time = "2026-02-10T19:18:20.687Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/6f/6cc6cc9955caa6eaf83660b0da2b077c7fe8ff9950a3c5e45d605038d439/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a", size = 4218321, upload-time = "2026-02-10T19:18:22.349Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/5d/c4da701939eeee699566a6c1367427ab91a8b7088cc2328c09dbee940415/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356", size = 4381786, upload-time = "2026-02-10T19:18:24.529Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/97/a538654732974a94ff96c1db621fa464f455c02d4bb7d2652f4edc21d600/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da", size = 4217990, upload-time = "2026-02-10T19:18:25.957Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/11/7e500d2dd3ba891197b9efd2da5454b74336d64a7cc419aa7327ab74e5f6/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257", size = 4381252, upload-time = "2026-02-10T19:18:27.496Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/58/6b3d24e6b9bc474a2dcdee65dfd1f008867015408a271562e4b690561a4d/cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7", size = 3407605, upload-time = "2026-02-10T19:18:29.233Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/5d/4a8f770695d73be252331e60e526291e3df0c9b27556a90a6b47bccca4c2/cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4", size = 7179869, upload-time = "2026-04-08T01:56:17.157Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/45/6d80dc379b0bbc1f9d1e429f42e4cb9e1d319c7a8201beffd967c516ea01/cryptography-46.0.7-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b36a4695e29fe69215d75960b22577197aca3f7a25b9cf9d165dcfe9d80bc325", size = 4275492, upload-time = "2026-04-08T01:56:19.36Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/9a/1765afe9f572e239c3469f2cb429f3ba7b31878c893b246b4b2994ffe2fe/cryptography-46.0.7-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ad9ef796328c5e3c4ceed237a183f5d41d21150f972455a9d926593a1dcb308", size = 4426670, upload-time = "2026-04-08T01:56:21.415Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/3e/af9246aaf23cd4ee060699adab1e47ced3f5f7e7a8ffdd339f817b446462/cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77", size = 4280275, upload-time = "2026-04-08T01:56:23.539Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/54/6bbbfc5efe86f9d71041827b793c24811a017c6ac0fd12883e4caa86b8ed/cryptography-46.0.7-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cbd5fb06b62bd0721e1170273d3f4d5a277044c47ca27ee257025146c34cbdd1", size = 4928402, upload-time = "2026-04-08T01:56:25.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/cf/054b9d8220f81509939599c8bdbc0c408dbd2bdd41688616a20731371fe0/cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef", size = 4459985, upload-time = "2026-04-08T01:56:27.309Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/46/4e4e9c6040fb01c7467d47217d2f882daddeb8828f7df800cb806d8a2288/cryptography-46.0.7-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:24402210aa54baae71d99441d15bb5a1919c195398a87b563df84468160a65de", size = 3990652, upload-time = "2026-04-08T01:56:29.095Z" },
+    { url = "https://files.pythonhosted.org/packages/36/5f/313586c3be5a2fbe87e4c9a254207b860155a8e1f3cca99f9910008e7d08/cryptography-46.0.7-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8a469028a86f12eb7d2fe97162d0634026d92a21f3ae0ac87ed1c4a447886c83", size = 4279805, upload-time = "2026-04-08T01:56:30.928Z" },
+    { url = "https://files.pythonhosted.org/packages/69/33/60dfc4595f334a2082749673386a4d05e4f0cf4df8248e63b2c3437585f2/cryptography-46.0.7-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9694078c5d44c157ef3162e3bf3946510b857df5a3955458381d1c7cfc143ddb", size = 4892883, upload-time = "2026-04-08T01:56:32.614Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/0b/333ddab4270c4f5b972f980adef4faa66951a4aaf646ca067af597f15563/cryptography-46.0.7-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:42a1e5f98abb6391717978baf9f90dc28a743b7d9be7f0751a6f56a75d14065b", size = 4459756, upload-time = "2026-04-08T01:56:34.306Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/14/633913398b43b75f1234834170947957c6b623d1701ffc7a9600da907e89/cryptography-46.0.7-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91bbcb08347344f810cbe49065914fe048949648f6bd5c2519f34619142bbe85", size = 4410244, upload-time = "2026-04-08T01:56:35.977Z" },
+    { url = "https://files.pythonhosted.org/packages/10/f2/19ceb3b3dc14009373432af0c13f46aa08e3ce334ec6eff13492e1812ccd/cryptography-46.0.7-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5d1c02a14ceb9148cc7816249f64f623fbfee39e8c03b3650d842ad3f34d637e", size = 4674868, upload-time = "2026-04-08T01:56:38.034Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/bb/a5c213c19ee94b15dfccc48f363738633a493812687f5567addbcbba9f6f/cryptography-46.0.7-cp311-abi3-win32.whl", hash = "sha256:d23c8ca48e44ee015cd0a54aeccdf9f09004eba9fc96f38c911011d9ff1bd457", size = 3026504, upload-time = "2026-04-08T01:56:39.666Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/02/7788f9fefa1d060ca68717c3901ae7fffa21ee087a90b7f23c7a603c32ae/cryptography-46.0.7-cp311-abi3-win_amd64.whl", hash = "sha256:397655da831414d165029da9bc483bed2fe0e75dde6a1523ec2fe63f3c46046b", size = 3488363, upload-time = "2026-04-08T01:56:41.893Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/56/15619b210e689c5403bb0540e4cb7dbf11a6bf42e483b7644e471a2812b3/cryptography-46.0.7-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:d151173275e1728cf7839aaa80c34fe550c04ddb27b34f48c232193df8db5842", size = 7119671, upload-time = "2026-04-08T01:56:44Z" },
+    { url = "https://files.pythonhosted.org/packages/74/66/e3ce040721b0b5599e175ba91ab08884c75928fbeb74597dd10ef13505d2/cryptography-46.0.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:db0f493b9181c7820c8134437eb8b0b4792085d37dbb24da050476ccb664e59c", size = 4268551, upload-time = "2026-04-08T01:56:46.071Z" },
+    { url = "https://files.pythonhosted.org/packages/03/11/5e395f961d6868269835dee1bafec6a1ac176505a167f68b7d8818431068/cryptography-46.0.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ebd6daf519b9f189f85c479427bbd6e9c9037862cf8fe89ee35503bd209ed902", size = 4408887, upload-time = "2026-04-08T01:56:47.718Z" },
+    { url = "https://files.pythonhosted.org/packages/40/53/8ed1cf4c3b9c8e611e7122fb56f1c32d09e1fff0f1d77e78d9ff7c82653e/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:b7b412817be92117ec5ed95f880defe9cf18a832e8cafacf0a22337dc1981b4d", size = 4271354, upload-time = "2026-04-08T01:56:49.312Z" },
+    { url = "https://files.pythonhosted.org/packages/50/46/cf71e26025c2e767c5609162c866a78e8a2915bbcfa408b7ca495c6140c4/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:fbfd0e5f273877695cb93baf14b185f4878128b250cc9f8e617ea0c025dfb022", size = 4905845, upload-time = "2026-04-08T01:56:50.916Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/ea/01276740375bac6249d0a971ebdf6b4dc9ead0ee0a34ef3b5a88c1a9b0d4/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ffca7aa1d00cf7d6469b988c581598f2259e46215e0140af408966a24cf086ce", size = 4444641, upload-time = "2026-04-08T01:56:52.882Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/4c/7d258f169ae71230f25d9f3d06caabcff8c3baf0978e2b7d65e0acac3827/cryptography-46.0.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:60627cf07e0d9274338521205899337c5d18249db56865f943cbe753aa96f40f", size = 3967749, upload-time = "2026-04-08T01:56:54.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/2a/2ea0767cad19e71b3530e4cad9605d0b5e338b6a1e72c37c9c1ceb86c333/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:80406c3065e2c55d7f49a9550fe0c49b3f12e5bfff5dedb727e319e1afb9bf99", size = 4270942, upload-time = "2026-04-08T01:56:56.416Z" },
+    { url = "https://files.pythonhosted.org/packages/41/3d/fe14df95a83319af25717677e956567a105bb6ab25641acaa093db79975d/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:c5b1ccd1239f48b7151a65bc6dd54bcfcc15e028c8ac126d3fada09db0e07ef1", size = 4871079, upload-time = "2026-04-08T01:56:58.31Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/4a479e0f36f8f378d397f4eab4c850b4ffb79a2f0d58704b8fa0703ddc11/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d5f7520159cd9c2154eb61eb67548ca05c5774d39e9c2c4339fd793fe7d097b2", size = 4443999, upload-time = "2026-04-08T01:57:00.508Z" },
+    { url = "https://files.pythonhosted.org/packages/28/17/b59a741645822ec6d04732b43c5d35e4ef58be7bfa84a81e5ae6f05a1d33/cryptography-46.0.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fcd8eac50d9138c1d7fc53a653ba60a2bee81a505f9f8850b6b2888555a45d0e", size = 4399191, upload-time = "2026-04-08T01:57:02.654Z" },
+    { url = "https://files.pythonhosted.org/packages/59/6a/bb2e166d6d0e0955f1e9ff70f10ec4b2824c9cfcdb4da772c7dd69cc7d80/cryptography-46.0.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:65814c60f8cc400c63131584e3e1fad01235edba2614b61fbfbfa954082db0ee", size = 4655782, upload-time = "2026-04-08T01:57:04.592Z" },
+    { url = "https://files.pythonhosted.org/packages/95/b6/3da51d48415bcb63b00dc17c2eff3a651b7c4fed484308d0f19b30e8cb2c/cryptography-46.0.7-cp314-cp314t-win32.whl", hash = "sha256:fdd1736fed309b4300346f88f74cd120c27c56852c3838cab416e7a166f67298", size = 3002227, upload-time = "2026-04-08T01:57:06.91Z" },
+    { url = "https://files.pythonhosted.org/packages/32/a8/9f0e4ed57ec9cebe506e58db11ae472972ecb0c659e4d52bbaee80ca340a/cryptography-46.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e06acf3c99be55aa3b516397fe42f5855597f430add9c17fa46bf2e0fb34c9bb", size = 3475332, upload-time = "2026-04-08T01:57:08.807Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/7f/cd42fc3614386bc0c12f0cb3c4ae1fc2bbca5c9662dfed031514911d513d/cryptography-46.0.7-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:462ad5cb1c148a22b2e3bcc5ad52504dff325d17daf5df8d88c17dda1f75f2a4", size = 7165618, upload-time = "2026-04-08T01:57:10.645Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/d0/36a49f0262d2319139d2829f773f1b97ef8aef7f97e6e5bd21455e5a8fb5/cryptography-46.0.7-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:84d4cced91f0f159a7ddacad249cc077e63195c36aac40b4150e7a57e84fffe7", size = 4270628, upload-time = "2026-04-08T01:57:12.885Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6c/1a42450f464dda6ffbe578a911f773e54dd48c10f9895a23a7e88b3e7db5/cryptography-46.0.7-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:128c5edfe5e5938b86b03941e94fac9ee793a94452ad1365c9fc3f4f62216832", size = 4415405, upload-time = "2026-04-08T01:57:14.923Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/92/4ed714dbe93a066dc1f4b4581a464d2d7dbec9046f7c8b7016f5286329e2/cryptography-46.0.7-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5e51be372b26ef4ba3de3c167cd3d1022934bc838ae9eaad7e644986d2a3d163", size = 4272715, upload-time = "2026-04-08T01:57:16.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/e6/a26b84096eddd51494bba19111f8fffe976f6a09f132706f8f1bf03f51f7/cryptography-46.0.7-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cdf1a610ef82abb396451862739e3fc93b071c844399e15b90726ef7470eeaf2", size = 4918400, upload-time = "2026-04-08T01:57:19.021Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/08/ffd537b605568a148543ac3c2b239708ae0bd635064bab41359252ef88ed/cryptography-46.0.7-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1d25aee46d0c6f1a501adcddb2d2fee4b979381346a78558ed13e50aa8a59067", size = 4450634, upload-time = "2026-04-08T01:57:21.185Z" },
+    { url = "https://files.pythonhosted.org/packages/16/01/0cd51dd86ab5b9befe0d031e276510491976c3a80e9f6e31810cce46c4ad/cryptography-46.0.7-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:cdfbe22376065ffcf8be74dc9a909f032df19bc58a699456a21712d6e5eabfd0", size = 3985233, upload-time = "2026-04-08T01:57:22.862Z" },
+    { url = "https://files.pythonhosted.org/packages/92/49/819d6ed3a7d9349c2939f81b500a738cb733ab62fbecdbc1e38e83d45e12/cryptography-46.0.7-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:abad9dac36cbf55de6eb49badd4016806b3165d396f64925bf2999bcb67837ba", size = 4271955, upload-time = "2026-04-08T01:57:24.814Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/ad9b3c56ebb95ed2473d46df0847357e01583f4c52a85754d1a55e29e4d0/cryptography-46.0.7-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:935ce7e3cfdb53e3536119a542b839bb94ec1ad081013e9ab9b7cfd478b05006", size = 4879888, upload-time = "2026-04-08T01:57:26.88Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/c7/201d3d58f30c4c2bdbe9b03844c291feb77c20511cc3586daf7edc12a47b/cryptography-46.0.7-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:35719dc79d4730d30f1c2b6474bd6acda36ae2dfae1e3c16f2051f215df33ce0", size = 4449961, upload-time = "2026-04-08T01:57:29.068Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/ef/649750cbf96f3033c3c976e112265c33906f8e462291a33d77f90356548c/cryptography-46.0.7-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7bbc6ccf49d05ac8f7d7b5e2e2c33830d4fe2061def88210a126d130d7f71a85", size = 4401696, upload-time = "2026-04-08T01:57:31.029Z" },
+    { url = "https://files.pythonhosted.org/packages/41/52/a8908dcb1a389a459a29008c29966c1d552588d4ae6d43f3a1a4512e0ebe/cryptography-46.0.7-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a1529d614f44b863a7b480c6d000fe93b59acee9c82ffa027cfadc77521a9f5e", size = 4664256, upload-time = "2026-04-08T01:57:33.144Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/fa/f0ab06238e899cc3fb332623f337a7364f36f4bb3f2534c2bb95a35b132c/cryptography-46.0.7-cp38-abi3-win32.whl", hash = "sha256:f247c8c1a1fb45e12586afbb436ef21ff1e80670b2861a90353d9b025583d246", size = 3013001, upload-time = "2026-04-08T01:57:34.933Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/f1/00ce3bde3ca542d1acd8f8cfa38e446840945aa6363f9b74746394b14127/cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3", size = 3472985, upload-time = "2026-04-08T01:57:36.714Z" },
+    { url = "https://files.pythonhosted.org/packages/63/0c/dca8abb64e7ca4f6b2978769f6fea5ad06686a190cec381f0a796fdcaaba/cryptography-46.0.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc9ab8856ae6cf7c9358430e49b368f3108f050031442eaeb6b9d87e4dcf4e4f", size = 3476879, upload-time = "2026-04-08T01:57:38.664Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/ea/075aac6a84b7c271578d81a2f9968acb6e273002408729f2ddff517fed4a/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d3b99c535a9de0adced13d159c5a9cf65c325601aa30f4be08afd680643e9c15", size = 4219700, upload-time = "2026-04-08T01:57:40.625Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/7b/1c55db7242b5e5612b29fc7a630e91ee7a6e3c8e7bf5406d22e206875fbd/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d02c738dacda7dc2a74d1b2b3177042009d5cab7c7079db74afc19e56ca1b455", size = 4385982, upload-time = "2026-04-08T01:57:42.725Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/da/9870eec4b69c63ef5925bf7d8342b7e13bc2ee3d47791461c4e49ca212f4/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:04959522f938493042d595a736e7dbdff6eb6cc2339c11465b3ff89343b65f65", size = 4219115, upload-time = "2026-04-08T01:57:44.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/72/05aa5832b82dd341969e9a734d1812a6aadb088d9eb6f0430fc337cc5a8f/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:3986ac1dee6def53797289999eabe84798ad7817f3e97779b5061a95b0ee4968", size = 4385479, upload-time = "2026-04-08T01:57:46.86Z" },
+    { url = "https://files.pythonhosted.org/packages/20/2a/1b016902351a523aa2bd446b50a5bc1175d7a7d1cf90fe2ef904f9b84ebc/cryptography-46.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:258514877e15963bd43b558917bc9f54cf7cf866c38aa576ebf47a77ddbc43a4", size = 3412829, upload-time = "2026-04-08T01:57:48.874Z" },
 ]
 
 [[package]]
@@ -1573,6 +1573,7 @@ version = "0.13.0"
 source = { editable = "." }
 dependencies = [
     { name = "croniter" },
+    { name = "cryptography" },
     { name = "fire" },
     { name = "httpx", extra = ["socks"] },
     { name = "jinja2" },
@@ -1758,18 +1759,19 @@ youtube = [
 [package.metadata]
 requires-dist = [
     { name = "agent-client-protocol", marker = "extra == 'acp'", specifier = "==0.9.0" },
-    { name = "aiohttp", marker = "extra == 'homeassistant'", specifier = "==3.13.3" },
-    { name = "aiohttp", marker = "extra == 'messaging'", specifier = "==3.13.3" },
-    { name = "aiohttp", marker = "extra == 'slack'", specifier = "==3.13.3" },
-    { name = "aiohttp", marker = "extra == 'sms'", specifier = "==3.13.3" },
+    { name = "aiohttp", marker = "extra == 'homeassistant'", specifier = "==3.13.4" },
+    { name = "aiohttp", marker = "extra == 'messaging'", specifier = "==3.13.4" },
+    { name = "aiohttp", marker = "extra == 'slack'", specifier = "==3.13.4" },
+    { name = "aiohttp", marker = "extra == 'sms'", specifier = "==3.13.4" },
     { name = "aiohttp-socks", marker = "extra == 'matrix'", specifier = "==0.11.0" },
     { name = "aiosqlite", marker = "extra == 'matrix'", specifier = "==0.22.1" },
     { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" },
-    { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" },
+    { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.87.0" },
     { name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" },
     { name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" },
     { name = "brotlicffi", marker = "extra == 'messaging'", specifier = "==1.2.0.1" },
     { name = "croniter", specifier = "==6.0.0" },
+    { name = "cryptography", specifier = "==46.0.7" },
     { name = "daytona", marker = "extra == 'daytona'", specifier = "==0.155.0" },
     { name = "debugpy", marker = "extra == 'dev'", specifier = "==1.8.20" },
     { name = "dingtalk-stream", marker = "extra == 'dingtalk'", specifier = "==0.24.3" },

From 5f91b1a48b06c8260dc539614abda27cf4e831cb Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 01:55:06 -0700
Subject: [PATCH 121/218] feat(skills): add osint-investigation optional skill
 (closes #355) (#26729)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(skills): add osint-investigation optional skill (closes #355)

Phase-1 public-records OSINT investigation framework adapted from
ShinMegamiBoson/OpenPlanter (MIT). Lives in optional-skills/research/.

Six data-source wiki entries (FEC, SEC EDGAR, USAspending, Senate LD,
OFAC SDN, ICIJ Offshore Leaks), each following the 9-section template:
summary, access, schema, coverage, cross-reference keys, data quality,
acquisition, legal, references.

Six stdlib-only acquisition scripts that emit normalized CSV, plus three
analysis scripts:

  - entity_resolution.py  — three-tier match (exact / fuzzy / token overlap)
                            with explicit confidence per row
  - timing_analysis.py    — permutation test for donation/contract timing
                            correlation, joins through cross-links
  - build_findings.py     — assembles structured findings.json with
                            evidence chains pointing back to source rows

Validation: full pipeline runs end-to-end on synthetic fixtures. Entity
resolution found 24 cross-matches with 0 false positives on a 5-row /
4-row test set. Timing analysis on 5 donations clustered near 3 awards
returned p=0.000, effect size 2.41 SD. Findings JSON correctly tags
HIGH-severity timing pattern. All 9 scripts pass --help and py_compile.

Docs site page auto-generated by website/scripts/generate-skill-docs.py;
sidebar + catalog entries updated by the same generator.

* fix(osint-investigation): live API fixes from end-to-end sweep

Live-tested the skill on a real public-citizen query and found three bugs
the synthetic E2E missed. All three are now fixed and re-verified.

1. FEC fetch hung on contributor name searches.
   The combination of two_year_transaction_period + sort=date +
   contributor_name puts the OpenFEC query plan on a slow path that the
   upstream gateway times out (25s+). Switched to min_date/max_date with no
   explicit sort. Renamed --candidate to --contributor (the original name
   was misleading: FEC searches by donor, not by candidate; --candidate is
   kept as a deprecated alias). Added --state filter for narrowing.

2. ICIJ Offshore Leaks reconcile endpoint returns 404.
   ICIJ removed the Open Refine reconciliation API. Rewrote
   fetch_icij_offshore.py to download the official bulk CSV ZIP (~70 MB,
   public, no auth) and search it locally. Cached under
   $HERMES_OSINT_CACHE/icij/ (default ~/.cache/hermes-osint/icij/) for
   30 days, --force-refresh to refetch. Verified live: 'PUTIN' query
   returns 5 Panama Papers officer matches in 0.5s after first download.

3. SEC EDGAR silently returned 0 when the company-name resolver matched
   an individual Form 3/4/5 filer (insider trading disclosures).
   Now surfaces 'Resolved company X → CIK Y (Z)' on stderr, prints a
   filing-type histogram when the type filter wipes results, and
   explicitly warns when the matched CIK appears to be an individual
   filer rather than a corporate registrant.

Bonus: _http.py was retrying 429 responses with exponential backoff plus
honoring (often-missing) Retry-After headers, which compounded into
multi-second hangs per page when the upstream key was over quota.
Changed to fail-fast on 429 with a clear, actionable error showing the
upstream's quota message. Verified: 0.3s fast-fail vs the previous 60s
hang on DEMO_KEY rate-limit exhaustion.

Updated SKILL.md, fec.md, and icij-offshore.md to match the new CLI
flags and ICIJ bulk-cache flow. Regenerated the docusaurus page via
website/scripts/generate-skill-docs.py.

Live sweep results across all 6 sources for 'Dillon Rolnick, New York':
- OFAC SDN: 0 matches ✓ (correctly not sanctioned)
- USAspending: 0 matches ✓ (correctly not a federal contractor)
- Senate LDA: 0 matches ✓ (correctly not a lobbying client)
- SEC EDGAR: warns it resolved to 'Rolnick Michael' (CIK 0001845264)
    who is an individual Form 3 filer, not a corporate registrant
- ICIJ: 0 matches ✓ (correctly not in any offshore leak)
- FEC: rate-limited (DEMO_KEY); fails fast with clear quota message

* feat(osint-investigation): expand to 12 sources covering identity, property, courts, archives, news

Phase-2 expansion per Teknium feedback that the original 6-source skill
(federal financial/regulatory only) wasn't a complete OSINT toolkit. Adds
6 more sources covering the major omissions a real investigation would
reach for first.

New sources (6 fetch scripts + 6 wiki entries):

1. NYC ACRIS — Real property records (deeds, mortgages, liens) via the
   city's Socrata API. Search by party name or property address. Joins
   Parties to Master to populate doc_type, dates, borough, and amount.
   Coverage: 5 NYC boroughs, ~70M party records, 1966-present.

2. OpenCorporates — Global corporate registry covering 130+ jurisdictions
   (~200M companies). Free API token at
   https://opencorporates.com/api_accounts/new raises the rate limit;
   HTML fallback works without one (limited fields).

3. CourtListener (Free Law Project) — federal + state court opinions
   (~10M back to colonial era) + PACER dockets via RECAP. Anonymous v4
   search works; COURTLISTENER_TOKEN raises rate limits.

4. Wayback Machine CDX — historical web captures (~900B+). Used both for
   surveillance-of-record (when did this site change?) and as a
   content-recovery layer when other sources point to dead URLs.

5. Wikipedia + Wikidata — narrative bio + structured facts. Wikipedia
   OpenSearch for article matching, REST summary for extracts, Wikidata
   Action API (wbgetentities) for claims. Avoids the SPARQL Query
   Service which is aggressively rate-limited.

6. GDELT 2.0 DOC API — global news monitoring in 100+ languages,
   ~2015-present. Auto-retries with 6s backoff on the standard
   1-req-per-5-sec throttle.

Other changes in this commit:

- SEC EDGAR no longer raises SystemExit when the company-name resolver
  finds no CIK; writes an empty CSV with header so the rest of a
  pipeline can keep moving and the warning is just on stderr.

- _http.py User-Agent updated per Wikimedia policy: includes app name,
  version, and a 'set HERMES_OSINT_UA to identify yourself' instruction.

- SKILL.md workflow now groups sources into two clusters (federal
  financial vs identity/property/courts/archives/news) with bash
  examples for each. 'When to use this skill' lists the broader set of
  investigation patterns the expanded sources unlock.

Live sweep results on 'Dillon Rolnick, New York' across all 12 sources:

  ofac           ✓ 0 (correctly clean)
  icij           ✓ 0 (correctly not in any leak)
  usaspending    ✓ 0 (correctly not a federal contractor)
  senate_lda     ✓ 0 (correctly not a lobbying client)
  sec_edgar      ✓ 0, warns: resolved to 'Rolnick Michael' (CIK 0001845264),
                   individual Form 3 filer, NOT a corporate registrant
  fec            — rate-limited (DEMO_KEY exhausted), fails fast with
                   clear quota message
  nyc_acris      ✓ 200 records named Rolnick across NYC; 48 records at
                   571 Hudson (the property the web identifies as his)
  opencorporates ✓ 0 (no API token configured; HTML fallback)
  courtlistener  ✓ 0 for 'Dillon Rolnick'; 20 for 'Rolnick' generally;
                   5 for 'Microsoft' sanity check
  wayback        ✓ 30 captures of nousresearch.com from 2011-present
  wikipedia      ✓ 0 (correctly not notable enough); Bill Gates sanity
                   returns full structured facts (occupation, employer,
                   DOB, place of birth, country)
  gdelt          ✓ 0 for 'Dillon Rolnick'; 5 for 'Nous Research'

All 17 scripts compile clean and pass --help. Synthetic analysis pipeline
regression still passes (entity_resolution 30 matches, timing p=0.000,
findings 2).

* feat(osint-investigation): remove FEC; DEMO_KEY rate-limits make it unreliable

The FEC fetcher consistently failed the live sweep because the OpenFEC
DEMO_KEY tier (40 calls/hour) exhausts on a single investigation, and
the upstream returns slow-path query plans for unindexed contributor-name
searches that the gateway times out. Without a real API key it's not
usable; with one the user has to sign up at api.data.gov first. That's
too much setup friction for a skill that should work out of the box.

Removed:
  - scripts/fetch_fec.py
  - references/sources/fec.md

Updated:
  - SKILL.md frontmatter description + tags
  - 'When NOT to use' now points users at https://www.fec.gov/data/ for
    federal donations
  - entity_resolution example switched from donor↔contractor to
    lobbying-client↔contractor (Senate LDA + USAspending pair)
  - timing_analysis example switched to lobbying-filings vs awards
  - 8 wiki entries had their 'FEC ↔ ...' cross-reference bullets removed

11 sources remain (5 federal financial + 6 identity/property/courts/
archives/news). All scripts compile, pass --help, and the synthetic
analysis pipeline still passes on the new lobbying-shaped regression
fixture (30 matches, p=0.000 on tight clustering, 2 findings).
---
 .../research/osint-investigation/SKILL.md     | 277 +++++++++++++++++
 .../references/sources/courtlistener.md       |  98 ++++++
 .../references/sources/gdelt.md               | 104 +++++++
 .../references/sources/icij-offshore.md       | 104 +++++++
 .../references/sources/nyc-acris.md           |  90 ++++++
 .../references/sources/ofac-sdn.md            |  92 ++++++
 .../references/sources/opencorporates.md      | 103 ++++++
 .../references/sources/sec-edgar.md           |  83 +++++
 .../references/sources/senate-ld.md           |  89 ++++++
 .../references/sources/usaspending.md         |  97 ++++++
 .../references/sources/wayback.md             |  93 ++++++
 .../references/sources/wikipedia.md           | 107 +++++++
 .../osint-investigation/scripts/_http.py      |  82 +++++
 .../osint-investigation/scripts/_normalize.py |  67 ++++
 .../scripts/build_findings.py                 | 221 +++++++++++++
 .../scripts/entity_resolution.py              | 228 ++++++++++++++
 .../scripts/fetch_courtlistener.py            | 149 +++++++++
 .../scripts/fetch_gdelt.py                    | 162 ++++++++++
 .../scripts/fetch_icij_offshore.py            | 234 ++++++++++++++
 .../scripts/fetch_nyc_acris.py                | 203 ++++++++++++
 .../scripts/fetch_ofac_sdn.py                 | 175 +++++++++++
 .../scripts/fetch_opencorporates.py           | 192 ++++++++++++
 .../scripts/fetch_sec_edgar.py                | 184 +++++++++++
 .../scripts/fetch_senate_ld.py                | 146 +++++++++
 .../scripts/fetch_usaspending.py              | 170 ++++++++++
 .../scripts/fetch_wayback.py                  | 142 +++++++++
 .../scripts/fetch_wikipedia.py                | 267 ++++++++++++++++
 .../scripts/timing_analysis.py                | 253 +++++++++++++++
 .../templates/source-template.md              |  59 ++++
 .../docs/reference/optional-skills-catalog.md |   1 +
 .../research/research-osint-investigation.md  | 294 ++++++++++++++++++
 website/sidebars.ts                           |   1 +
 32 files changed, 4567 insertions(+)
 create mode 100644 optional-skills/research/osint-investigation/SKILL.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/courtlistener.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/gdelt.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/icij-offshore.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/nyc-acris.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/ofac-sdn.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/opencorporates.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/sec-edgar.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/senate-ld.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/usaspending.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/wayback.md
 create mode 100644 optional-skills/research/osint-investigation/references/sources/wikipedia.md
 create mode 100644 optional-skills/research/osint-investigation/scripts/_http.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/_normalize.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/build_findings.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/entity_resolution.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_courtlistener.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_gdelt.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_icij_offshore.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_nyc_acris.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_ofac_sdn.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_sec_edgar.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_senate_ld.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_usaspending.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_wayback.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py
 create mode 100644 optional-skills/research/osint-investigation/scripts/timing_analysis.py
 create mode 100644 optional-skills/research/osint-investigation/templates/source-template.md
 create mode 100644 website/docs/user-guide/skills/optional/research/research-osint-investigation.md

diff --git a/optional-skills/research/osint-investigation/SKILL.md b/optional-skills/research/osint-investigation/SKILL.md
new file mode 100644
index 00000000000..b2da82fbd00
--- /dev/null
+++ b/optional-skills/research/osint-investigation/SKILL.md
@@ -0,0 +1,277 @@
+---
+name: osint-investigation
+description: Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback Machine archives, Wikipedia + Wikidata, GDELT news monitoring. Entity resolution across sources, cross-link analysis, timing correlation, evidence chains. Python stdlib only.
+version: 0.1.0
+platforms: [linux, macos, windows]
+author: Hermes Agent (adapted from ShinMegamiBoson/OpenPlanter, MIT)
+metadata:
+  hermes:
+    tags: [osint, investigation, public-records, sec, sanctions, corporate-registry, property, courts, due-diligence, journalism]
+    category: research
+    related_skills: [domain-intel, arxiv]
+---
+
+# OSINT Investigation — Public Records Cross-Reference
+
+Investigative framework for public-records OSINT: government contracts,
+corporate filings, lobbying, sanctions, offshore leaks, property records,
+court records, web archives, knowledge bases, and global news. Resolve
+entities across heterogeneous sources, build cross-links with explicit
+confidence, run statistical timing tests, and produce structured evidence
+chains.
+
+**Python stdlib only.** Zero install. Works on Linux, macOS, Windows. Most
+sources work with no API key (OpenCorporates has an optional free token
+that raises rate limits).
+
+Adapted from the MIT-licensed ShinMegamiBoson/OpenPlanter project; expanded
+to cover identity / property / litigation / archives / news sources that
+the original didn't address.
+
+## When to use this skill
+
+Use when the user asks for:
+
+- "follow the money" — government contracts, lobbying → legislation, sanctions
+- corporate due diligence — who controls company X, where are they
+  incorporated, who serves on their boards, what filings have they made
+- sanctions screening — is entity X on OFAC SDN, ICIJ offshore leaks
+- pay-to-play investigation — contractors with offshore ties, lobbying
+  clients winning awards
+- property ownership — find recorded deeds/mortgages by name or address
+  (NYC; for other counties point users at the relevant recorder)
+- litigation history — find federal + state court opinions and PACER dockets
+- multi-source entity resolution where naming varies (LLC suffixes, abbreviations)
+- evidence-chain construction with explicit confidence levels
+- "what's been said about X" — international news (GDELT) + Wikipedia
+  narrative + Wayback Machine to recover dead URLs
+
+Do NOT use this skill for:
+
+- general web research → `web_search` / `web_extract`
+- domain/infrastructure OSINT → `domain-intel` skill
+- academic literature → `arxiv` skill
+- social-media profile discovery → `sherlock` skill (optional)
+- US **federal** campaign finance — FEC is intentionally NOT covered here
+  (the API is unreliable for ad-hoc contributor-name queries on the free
+  DEMO_KEY tier). For federal donations, point users at
+  https://www.fec.gov/data/ directly.
+
+## Workflow
+
+The agent runs scripts via the `terminal` tool. `SKILL_DIR` is the directory
+holding this SKILL.md.
+
+### 1. Identify which sources apply
+
+Read the data-source wiki entries to plan the investigation:
+
+```
+ls SKILL_DIR/references/sources/
+
+# Federal financial / regulatory
+cat SKILL_DIR/references/sources/sec-edgar.md       # corporate filings
+cat SKILL_DIR/references/sources/usaspending.md     # federal contracts
+cat SKILL_DIR/references/sources/senate-ld.md       # lobbying
+cat SKILL_DIR/references/sources/ofac-sdn.md        # sanctions
+cat SKILL_DIR/references/sources/icij-offshore.md   # offshore leaks
+
+# Identity / property / litigation / archives / news
+cat SKILL_DIR/references/sources/nyc-acris.md       # NYC property records
+cat SKILL_DIR/references/sources/opencorporates.md  # global corporate registry
+cat SKILL_DIR/references/sources/courtlistener.md   # court records (federal + state)
+cat SKILL_DIR/references/sources/wayback.md         # Wayback Machine archives
+cat SKILL_DIR/references/sources/wikipedia.md       # Wikipedia + Wikidata
+cat SKILL_DIR/references/sources/gdelt.md           # global news monitoring
+```
+
+Each entry follows a 9-section template: summary, access, schema, coverage,
+cross-reference keys, data quality, acquisition, legal, references.
+
+The **cross-reference potential** section maps join keys between sources — read
+those first to pick the right pair.
+
+### 2. Acquire data
+
+Each source has a stdlib-only fetch script in `SKILL_DIR/scripts/`:
+
+**Federal financial / regulatory**
+
+```bash
+# SEC EDGAR filings (corporate disclosures)
+python3 SKILL_DIR/scripts/fetch_sec_edgar.py --cik 0000320193 \
+    --types 10-K,10-Q --out data/edgar_filings.csv
+
+# USAspending federal contracts
+python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
+    --fy 2024 --out data/contracts.csv
+
+# Senate LD-1 / LD-2 lobbying disclosures
+python3 SKILL_DIR/scripts/fetch_senate_ld.py --client "EXAMPLE CORP" \
+    --year 2024 --out data/lobbying.csv
+
+# OFAC SDN sanctions list (full snapshot)
+python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --out data/ofac_sdn.csv
+
+# ICIJ Offshore Leaks — downloads ~70 MB bulk CSV on first use,
+# then searches it locally. Cached for 30 days under
+# $HERMES_OSINT_CACHE/icij/ (default: ~/.cache/hermes-osint/icij/).
+python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
+    --out data/icij.csv
+```
+
+**Identity / property / litigation / archives / news**
+
+```bash
+# NYC property records (deeds, mortgages, liens) — ACRIS via Socrata
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "SMITH, JOHN" \
+    --out data/acris.csv
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --address "571 HUDSON" \
+    --out data/acris_addr.csv
+
+# OpenCorporates — 130+ jurisdiction corporate registry
+# (free token required; set OPENCORPORATES_API_TOKEN or pass --token)
+python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
+    --jurisdiction us_ny --out data/opencorporates.csv
+
+# CourtListener — federal + state court opinions, PACER dockets
+python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Smith v. Example Corp" \
+    --type opinions --out data/courts.csv
+
+# Wayback Machine — historical web captures
+python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
+    --match host --collapse digest --out data/wayback.csv
+
+# Wikipedia + Wikidata — narrative bio + structured facts
+# Set HERMES_OSINT_UA=your-app/1.0 (your@email) to identify yourself
+python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Bill Gates" \
+    --out data/wp.csv
+
+# GDELT — global news in 100+ languages, ~2015→present
+python3 SKILL_DIR/scripts/fetch_gdelt.py --query '"Example Corp"' \
+    --timespan 1y --out data/gdelt.csv
+```
+
+All outputs are normalized CSV with a header row. Re-run scripts idempotently.
+
+When a private individual won't be in a source (e.g. SEC EDGAR for a non-public-
+company person, USAspending for someone who isn't a federal contractor, Senate
+LDA for someone who isn't a lobbying client), the script returns 0 rows with a
+clear warning rather than silently writing an empty CSV. EDGAR specifically
+flags when the company-name resolver matched an individual Form 3/4/5 filer
+rather than a corporate registrant.
+
+Rate-limit notes are in each source's wiki entry. Default fetchers sleep
+politely between paginated requests. **API keys raise rate limits** for
+sources that support them (`SEC_USER_AGENT`, `SENATE_LDA_TOKEN`,
+`OPENCORPORATES_API_TOKEN`, `COURTLISTENER_TOKEN`). All scripts surface
+429 responses immediately with the upstream's quota message so the user
+knows to slow down or supply a key.
+
+### 3. Resolve entities across sources
+
+Normalize names and find matches between two CSV files:
+
+```bash
+# Match lobbying clients (Senate LDA) against contract recipients (USAspending)
+python3 SKILL_DIR/scripts/entity_resolution.py \
+    --left  data/lobbying.csv   --left-name-col  client_name \
+    --right data/contracts.csv  --right-name-col recipient_name \
+    --out data/cross_links.csv
+```
+
+Three matching tiers with explicit confidence:
+
+| Tier | Method | Confidence |
+|------|--------|------------|
+| `exact` | Normalized strings equal after suffix/punctuation strip | high |
+| `fuzzy` | Sorted-token equality (word-bag match) | medium |
+| `token_overlap` | ≥60% token overlap, ≥2 shared tokens, tokens ≥4 chars | low |
+
+Output `cross_links.csv` columns: `match_type, confidence, left_name,
+right_name, left_normalized, right_normalized, left_row, right_row`.
+
+### 4. Statistical timing correlation (optional)
+
+Test whether two time series cluster suspiciously close together — e.g.
+lobbying filings near contract awards — using a permutation test:
+
+```bash
+python3 SKILL_DIR/scripts/timing_analysis.py \
+    --donations data/lobbying.csv --donation-date-col filing_date \
+        --donation-amount-col income --donation-donor-col client_name \
+        --donation-recipient-col registrant_name \
+    --contracts data/contracts.csv --contract-date-col award_date \
+        --contract-vendor-col recipient_name \
+    --cross-links data/cross_links.csv \
+    --permutations 1000 \
+    --out data/timing.json
+```
+
+The script's column flags are intentionally generic — the original tool was
+written for donations vs awards, but it works for any (event, payee) time
+series joined through cross-links. Null hypothesis: event timing is
+independent of award dates. One-tailed p-value = fraction of permutations
+with mean nearest-award distance ≤ observed. Minimum 3 events per (payer,
+vendor) pair to run the test.
+
+### 5. Build the findings JSON (evidence chain)
+
+```bash
+python3 SKILL_DIR/scripts/build_findings.py \
+    --cross-links data/cross_links.csv \
+    --timing data/timing.json \
+    --out data/findings.json
+```
+
+Every finding has `id, title, severity, confidence, summary, evidence[], sources[]`.
+Each evidence item points back to a specific row in a source CSV. The user (or a
+follow-up agent) can verify every claim against its source.
+
+## Confidence and evidence discipline
+
+This is the load-bearing rule of the skill. Tell the user:
+
+- Every claim must trace to a record. No naked assertions.
+- Confidence tier travels with the claim. `match_type=fuzzy` is "probable",
+  not "confirmed."
+- Entity resolution produces candidates, NOT conclusions. A `fuzzy` match
+  between "ACME LLC" and "Acme Holdings Group" is a lead, not a fact.
+- Statistical significance ≠ wrongdoing. p < 0.05 means the timing pattern
+  is unlikely under the null. It does not establish corruption.
+- All data sources here are public records. They may still contain
+  inaccuracies, stale info, or redactions (GDPR, sealed records).
+
+## Adding a new data source
+
+Use the template:
+
+```bash
+cp SKILL_DIR/templates/source-template.md \
+    SKILL_DIR/references/sources/<your-source>.md
+```
+
+Fill in all 9 sections. Write a `fetch_<source>.py` script in `scripts/` that
+uses stdlib only and writes a normalized CSV. Update the source list in the
+"When to use" section above.
+
+## Tools and their limits
+
+- `entity_resolution.py` does NOT use external fuzzy libraries (no rapidfuzz,
+  no jellyfish). Token-bag matching is the upper bound here. If you need
+  Levenshtein, transliteration, or phonetic matching, pip-install separately.
+- `timing_analysis.py` uses Python's `random` for permutations. For
+  reproducibility, pass `--seed N`.
+- `fetch_*.py` scripts use `urllib.request` and respect `Retry-After`. Heavy
+  bulk usage may still violate ToS — read each source's legal section first.
+
+## Legal note
+
+All Phase-1 sources are public records. Bulk acquisition is permitted under
+their respective access terms (FOIA, public records law, ICIJ explicit
+publication, OFAC public data). However:
+
+- Some sources rate-limit aggressively. Respect their headers.
+- Some redact registrant info (GDPR on WHOIS, sealed filings).
+- Cross-referencing public records to identify private individuals can have
+  ethical implications. The skill produces evidence chains, not accusations.
diff --git a/optional-skills/research/osint-investigation/references/sources/courtlistener.md b/optional-skills/research/osint-investigation/references/sources/courtlistener.md
new file mode 100644
index 00000000000..0365b2ba0b1
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/courtlistener.md
@@ -0,0 +1,98 @@
+# CourtListener — Free Law Project
+
+## 1. Summary
+
+CourtListener (Free Law Project) aggregates court opinions, dockets, oral
+arguments, and judge data. Covers ~10M federal and state court opinions
+back to colonial America, plus PACER docket data from RECAP submissions.
+
+## 2. Access Methods
+
+- **REST API v4:** `https://www.courtlistener.com/api/rest/v4/`
+- **Auth:** Anonymous reads allowed on most endpoints; token raises rate
+  limits and unlocks bulk export
+- **Rate limit:** ~5,000 req/hour unauthenticated for search; higher with token
+
+Set `COURTLISTENER_TOKEN` env var. Get a free token at
+https://www.courtlistener.com/sign-in/ then create an API key.
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_courtlistener.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `case_name` | str | Case name |
+| `court` | str | Court name |
+| `court_id` | str | Court ID (e.g. `nysd`, `scotus`, `ca9`) |
+| `date_filed` | str | YYYY-MM-DD |
+| `docket_number` | str | Court docket number |
+| `judge` | str | Judge name(s) |
+| `citation` | str | Reporter citation(s) |
+| `result_type` | str | opinions / dockets / oral / people |
+| `snippet` | str | Search-match snippet (up to 500 chars) |
+| `absolute_url` | str | Direct CourtListener URL |
+
+## 4. Coverage
+
+- Federal: all circuit and district courts, SCOTUS
+- State: all 50 state supreme/appellate courts, many trial courts
+- Opinions: ~10M back to 1600s (colonial), full coverage 1950 → present
+- Dockets via RECAP: ~3M+ from user-submitted PACER PDFs
+- Updated continuously
+
+## 5. Cross-Reference Potential
+
+- **OpenCorporates** ↔ `case_name` (corporate litigation)
+- **SEC EDGAR** ↔ `case_name` (securities class actions)
+- **OFAC SDN** ↔ `case_name` (sanctions-related civil/criminal cases)
+
+Join key: party name from `case_name`. Note: `case_name` often abbreviates
+("Smith v. Jones" rather than full party names) — use the full case URL
+to get all parties.
+
+## 6. Data Quality
+
+- Older opinions (pre-1990) often lack docket numbers and judges
+- State coverage is more uneven than federal
+- PACER docket coverage depends on RECAP user submissions — not exhaustive
+- Sealed documents are excluded
+- Party names in case captions don't always match filing names exactly
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_courtlistener.py`
+
+```bash
+# Search opinions for a party / keyword
+python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Example Corp" \
+    --out data/cl.csv
+
+# PACER dockets (best for recent litigation)
+python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Example Corp" \
+    --type dockets --out data/cl_dockets.csv
+
+# Restrict to a court
+python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Microsoft" \
+    --court ca9 --out data/cl_9th.csv
+
+# Date range
+python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Example Corp" \
+    --date-from 2020-01-01 --date-to 2024-12-31 --out data/cl.csv
+```
+
+Pass `--token` or set `COURTLISTENER_TOKEN`.
+
+## 8. Legal & Licensing
+
+- Court opinions are public domain
+- Free Law Project provides the data under CC0 / public domain dedication
+- No commercial use restrictions on opinion text or metadata
+- Some PACER PDFs have copyright on layout (not text) — fair use applies
+
+## 9. References
+
+- API docs: https://www.courtlistener.com/help/api/rest/
+- Court IDs: https://www.courtlistener.com/api/jurisdictions/
+- RECAP archive: https://www.courtlistener.com/recap/
+- Bulk data: https://www.courtlistener.com/help/api/bulk-data/
diff --git a/optional-skills/research/osint-investigation/references/sources/gdelt.md b/optional-skills/research/osint-investigation/references/sources/gdelt.md
new file mode 100644
index 00000000000..785c171a0c9
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/gdelt.md
@@ -0,0 +1,104 @@
+# GDELT — Global News Monitoring
+
+## 1. Summary
+
+GDELT (Global Database of Events, Language, and Tone) monitors world news
+in 100+ languages with full-text indexing. Updated every 15 minutes.
+~2015 → present, ~1B+ articles indexed. Free anonymous access.
+
+GDELT is wider than Google News (more international, more long-tail
+sources) and indexed by tone/sentiment, themes (CAMEO codes), people, and
+organizations.
+
+## 2. Access Methods
+
+- **DOC 2.0 API:** `https://api.gdeltproject.org/api/v2/doc/doc`
+- **Events / GKG 2.0:** `https://api.gdeltproject.org/api/v2/events/events`
+- **Auth:** None
+- **Rate limit:** **1 request per 5 seconds** for the DOC API — strict
+
+The fetch script automatically retries after a 6-second sleep when a
+429 is received.
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_gdelt.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `title` | str | Article title |
+| `url` | str | Article URL |
+| `seen_date` | str | When GDELT first saw the article (UTC) |
+| `domain` | str | Publisher domain |
+| `language` | str | Source language |
+| `source_country` | str | 2-letter country code |
+| `tone` | str | GDELT-computed tone score (negative = negative coverage) |
+| `social_image` | str | Open Graph image URL when available |
+
+## 4. Coverage
+
+- Worldwide news in 100+ languages
+- ~2015 → present (Events back to 1979 via a separate stream)
+- Update frequency: 15 minutes
+- Bias: heavily Anglophone in volume but very wide source list overall
+
+## 5. Cross-Reference Potential
+
+- **All sources** ↔ `title` / `url` (news context for any subject)
+- **Wikipedia** ↔ event timeline for notable entities
+- **Wayback Machine** ↔ recover articles whose URLs have died
+- **OFAC SDN** ↔ news context for sanctions designations
+- **SEC EDGAR** ↔ news context for 8-K material events
+
+Join key: entity name appearing in article title or full-text. GDELT also
+extracts named entities into a separate stream (GKG) not exposed by this
+fetcher — query GDELT directly for entity-level filtering.
+
+## 6. Data Quality
+
+- Title extraction is automated and can be wrong (sometimes captures the
+  site name + delimiter + article title; sometimes a generic page title)
+- Sentiment / tone is computed by GDELT, not source-supplied
+- Some domains are oversampled (newswires, aggregators)
+- Source country is inferred from domain registration / TLD — can be
+  wrong for international news sites with country-neutral domains
+- Article URLs can rot — pair with Wayback Machine to preserve content
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_gdelt.py`
+
+```bash
+# Recent news mentioning an entity
+python3 SKILL_DIR/scripts/fetch_gdelt.py --query "Nous Research" \
+    --timespan 6m --out data/gdelt.csv
+
+# Phrase-exact (use double quotes inside single quotes for the shell)
+python3 SKILL_DIR/scripts/fetch_gdelt.py --query '"Dillon Rolnick"' \
+    --timespan 1y --out data/gdelt.csv
+
+# Filter to a country / language
+python3 SKILL_DIR/scripts/fetch_gdelt.py --query "Microsoft" \
+    --source-country US --source-lang English --out data/gdelt.csv
+
+# Date range
+python3 SKILL_DIR/scripts/fetch_gdelt.py --query "Microsoft" \
+    --start 2024-01-01 --end 2024-12-31 --out data/gdelt.csv
+```
+
+GDELT supports its own query operators: phrase quoting, AND/OR/NOT,
+`sourcecountry:US`, `theme:ECON_BANKRUPTCY`, `tone<-5`, etc.
+See https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/ for syntax.
+
+## 8. Legal & Licensing
+
+- GDELT data is provided free for academic and journalistic use
+- Article URLs link out to original publishers — copyright remains with
+  the publisher
+- GDELT is NOT a content archive; it's a metadata index
+
+## 9. References
+
+- DOC 2.0 API: https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/
+- Themes & query syntax: https://blog.gdeltproject.org/gkg-2-0-our-global-knowledge-graph-2-0-amazing-data-at-your-fingertips/
+- Project home: https://www.gdeltproject.org/
diff --git a/optional-skills/research/osint-investigation/references/sources/icij-offshore.md b/optional-skills/research/osint-investigation/references/sources/icij-offshore.md
new file mode 100644
index 00000000000..99e2abcb24b
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/icij-offshore.md
@@ -0,0 +1,104 @@
+# ICIJ Offshore Leaks Database
+
+## 1. Summary
+
+The International Consortium of Investigative Journalists (ICIJ) publishes a
+combined database of offshore entities from the Panama Papers, Paradise Papers,
+Pandora Papers, Bahamas Leaks, and Offshore Leaks. ~800,000+ offshore entities
+with their officers, intermediaries, and addresses.
+
+## 2. Access Methods
+
+- **Bulk download (primary):** `https://offshoreleaks-data.icij.org/offshoreleaks/csv/full-oldb.LATEST.zip` (~70 MB ZIP, refreshed periodically)
+- **Search UI (human):** `https://offshoreleaks.icij.org/`
+- **Auth:** None
+- **Note:** The previous Open Refine reconciliation endpoint at
+  `/reconcile` now returns 404. ICIJ has removed it. The bulk ZIP is the
+  remaining stable access path. The skill's `fetch_icij_offshore.py` caches
+  the ZIP locally (default `~/.cache/hermes-osint/icij/`, refreshes after
+  30 days) and searches it offline.
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_icij_offshore.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `node_id` | int | ICIJ canonical node ID |
+| `name` | str | Entity / officer / intermediary name |
+| `node_type` | str | entity / officer / intermediary / address |
+| `country_codes` | str | Semicolon-separated ISO codes |
+| `countries` | str | Country names |
+| `jurisdiction` | str | Offshore jurisdiction (BVI, Panama, etc.) |
+| `incorporation_date` | str | YYYY-MM-DD |
+| `inactivation_date` | str | YYYY-MM-DD (if struck) |
+| `source` | str | Panama Papers / Paradise Papers / Pandora Papers / etc. |
+| `entity_url` | str | Link to ICIJ page |
+| `connections` | str | Semicolon-separated node IDs of related entities |
+
+## 4. Coverage
+
+- Worldwide offshore entity records
+- Earliest records: 1970s (Bahamas Leaks). Most data 1990–2018.
+- NOT updated in real-time — new leaks added when ICIJ publishes them
+- ~810,000 offshore entities + ~750,000 officers + ~150,000 intermediaries
+
+## 5. Cross-Reference Potential
+
+- **SEC EDGAR** ↔ `name` (public companies with offshore arms)
+- **USAspending** ↔ `name` (federal contractors with offshore structure)
+- **OFAC SDN** ↔ `name` (sanctioned entities using offshore vehicles)
+
+Join key: normalized entity/officer name. `node_id` is canonical for cross-
+referencing within ICIJ. Connections graph traversal is in-script (BFS over
+`connections`).
+
+## 6. Data Quality
+
+- Offshore entity names sometimes appear in multiple leaks with slight variations
+- Officers may be nominees (front persons), not beneficial owners
+- Some entries have minimal info (just a name + jurisdiction)
+- The connections graph is incomplete — some relationships are documented in
+  source materials but not in the structured database
+- Inactive/struck-off entities are still included with `inactivation_date`
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_icij_offshore.py`
+
+```bash
+# Search by entity name (case-insensitive substring across the bulk DB)
+python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
+    --out data/icij.csv
+
+# Search by officer (individual person)
+python3 SKILL_DIR/scripts/fetch_icij_offshore.py --officer "SMITH JOHN" \
+    --out data/icij.csv
+
+# Search by jurisdiction (filter on cached results)
+python3 SKILL_DIR/scripts/fetch_icij_offshore.py --officer "SMITH" \
+    --jurisdiction "BRITISH VIRGIN ISLANDS" --out data/icij_bvi.csv
+
+# Force a fresh download (default refresh window is 30 days)
+python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
+    --force-refresh --out data/icij.csv
+```
+
+First call downloads the ~70 MB ZIP under `~/.cache/hermes-osint/icij/`
+(or `$HERMES_OSINT_CACHE/icij/`). Subsequent calls reuse the cache for 30 days.
+
+## 8. Legal & Licensing
+
+- Public record as published by ICIJ under explicit publication
+- No copyright on the underlying facts (entity names, jurisdictions)
+- ICIJ asks for attribution if used in derivative reporting
+- **Ethical note**: Presence in this database does NOT imply wrongdoing. Many
+  offshore structures are legal. The database is a research tool, not a list of
+  criminals.
+
+## 9. References
+
+- Database: https://offshoreleaks.icij.org/
+- About the data: https://offshoreleaks.icij.org/pages/about
+- Methodology: https://www.icij.org/investigations/panama-papers/
+- API hints: Open Refine reconciliation endpoint at `https://offshoreleaks.icij.org/reconcile`
diff --git a/optional-skills/research/osint-investigation/references/sources/nyc-acris.md b/optional-skills/research/osint-investigation/references/sources/nyc-acris.md
new file mode 100644
index 00000000000..4b20169bf3e
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/nyc-acris.md
@@ -0,0 +1,90 @@
+# NYC ACRIS — NYC Real Property Records
+
+## 1. Summary
+
+The Automated City Register Information System (ACRIS) is NYC's index of
+recorded property documents: deeds, mortgages, satisfactions, liens, UCC
+filings. Covers Manhattan, Bronx, Brooklyn, Queens, Staten Island.
+Published as 4 linked Socrata datasets on the NYC Open Data portal.
+
+## 2. Access Methods
+
+- **Socrata API:** `https://data.cityofnewyork.us/resource/636b-3b5g.json` (Parties)
+- **Other datasets:** `bnx9-e6tj` (Master), `8h5j-fqxa` (Legal), `uqqa-hym2` (References)
+- **Auth:** None for read access (Socrata `$app_token` raises rate limits if needed)
+- **Rate limit:** Generous (~1000 req/hour unauthenticated)
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_nyc_acris.py` (Parties joined to Master):
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `document_id` | str | ACRIS document ID |
+| `name` | str | Party name as recorded (often "LAST, FIRST" but varies) |
+| `party_type` | str | 1=grantor, 2=grantee, 3=other |
+| `party_role` | str | Human-readable role label |
+| `address_1` | str | Property or party address line 1 |
+| `city`, `state`, `zip`, `country` | str | Address parts |
+| `doc_type` | str | DEED, MTGE (mortgage), SAT (satisfaction), AGMT, etc. |
+| `doc_date`, `recorded_date` | str | YYYY-MM-DD |
+| `borough` | str | Manhattan / Bronx / Brooklyn / Queens / Staten Island |
+| `amount` | str | Document amount (USD, when applicable) |
+| `filing_url` | str | Direct ACRIS DocumentImageView link |
+
+## 4. Coverage
+
+- NYC 5 boroughs only — other counties have their own recorders
+- 1966 → present (older filings exist on microfilm at the County Clerk)
+- Updated nightly
+- ~70M+ party records cumulative
+
+## 5. Cross-Reference Potential
+
+- **SEC EDGAR** ↔ `name` (insider filers with NYC property)
+- **USAspending** ↔ `name` (federal contractors with NYC property)
+- **Senate LDA** ↔ `name` (lobbyists / clients with NYC property)
+- **ICIJ Offshore** ↔ `name` (NYC properties owned via offshore vehicles)
+
+Join key: normalized party name. NYC property records typically store names
+as "LAST, FIRST" or full LLC names — use `entity_resolution.py`.
+
+## 6. Data Quality
+
+- Same person appears with multiple name formats over time
+- LLC and trust ownership obscures beneficial owners
+- Recording lag can be 2-4 weeks after closing
+- Older documents have spottier address data
+- Sealed records (e.g. domestic violence shelters) are excluded by law
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_nyc_acris.py`
+
+```bash
+# By party name
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "ROLNICK" --out data/acris.csv
+
+# By address (useful when you know the property but not the names)
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --address "571 HUDSON" --out data/acris.csv
+
+# Restrict to grantees (buyers / mortgagees)
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "ROLNICK" --party-type 2 \
+    --out data/acris_buyers.csv
+```
+
+The script joins Parties → Master to populate doc_type, dates, borough, and
+amount. Pass `--no-enrich` to skip the join (faster, fewer columns).
+
+## 8. Legal & Licensing
+
+- Public record under NYS Real Property Law and NYC Charter
+- No commercial use restrictions on the data
+- All ACRIS data is public information by statute
+
+## 9. References
+
+- ACRIS portal: https://a836-acris.nyc.gov/CP/
+- NYC Open Data: https://data.cityofnewyork.us/
+- Parties dataset: https://data.cityofnewyork.us/City-Government/ACRIS-Real-Property-Parties/636b-3b5g
+- Document type codes: https://www1.nyc.gov/site/finance/taxes/acris.page
diff --git a/optional-skills/research/osint-investigation/references/sources/ofac-sdn.md b/optional-skills/research/osint-investigation/references/sources/ofac-sdn.md
new file mode 100644
index 00000000000..ab3602031f1
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/ofac-sdn.md
@@ -0,0 +1,92 @@
+# OFAC SDN — Specially Designated Nationals List
+
+## 1. Summary
+
+The Office of Foreign Assets Control (OFAC) publishes the Specially Designated
+Nationals and Blocked Persons List (SDN). US persons are generally prohibited
+from dealing with individuals and entities on this list. Also published:
+non-SDN consolidated lists (BIS Denied Persons, FSE, etc.).
+
+## 2. Access Methods
+
+- **Full XML:** `https://www.treasury.gov/ofac/downloads/sdn.xml`
+- **Delimited:** `https://www.treasury.gov/ofac/downloads/sdn.csv`
+- **Consolidated:** `https://www.treasury.gov/ofac/downloads/consolidated/consolidated.xml`
+- **Auth:** None
+- **Rate limit:** None (static file downloads). Updated continuously.
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_ofac_sdn.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `entity_id` | int | OFAC unique ID |
+| `name` | str | Primary name |
+| `entity_type` | str | individual / entity / vessel / aircraft |
+| `program_list` | str | Semicolon-separated sanctions programs (e.g. SDGT;IRAN) |
+| `title` | str | For individuals: title/role |
+| `nationalities` | str | Semicolon-separated country codes |
+| `aka_list` | str | Semicolon-separated "also known as" names |
+| `addresses` | str | Semicolon-separated known addresses |
+| `dob` | str | Date of birth (individuals) |
+| `pob` | str | Place of birth (individuals) |
+| `remarks` | str | OFAC's free-text remarks |
+| `last_updated` | str | YYYY-MM-DD (publication date) |
+
+## 4. Coverage
+
+- Worldwide — all entities sanctioned by US Treasury
+- ~10,000 entries on SDN, ~15,000 on consolidated lists
+- Updated continuously (sometimes daily during active enforcement)
+- Includes AKAs (very common, can be 10+ per entity)
+
+## 5. Cross-Reference Potential
+
+- **SEC EDGAR** ↔ `name` (public companies sanctioned)
+- **USAspending** ↔ `name` (sanctioned entity as federal contractor — should
+  be impossible but verify)
+- **ICIJ Offshore** ↔ `name` (offshore entities also sanctioned)
+
+Join key: normalized name. **CRITICAL**: must match against `aka_list` too.
+Many sanctioned entities are caught only via aliases.
+
+## 6. Data Quality
+
+- Names are transliterated from many scripts — multiple romanizations possible
+- AKAs often differ wildly from primary name
+- Some entries have minimal info (no DOB, no address) for individuals
+- Free-text `remarks` contain critical context — read them
+- "Specially Designated Global Terrorists" (SDGT) and "Cyber-related" (CYBER2)
+  programs add and remove entries frequently
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_ofac_sdn.py`
+
+```bash
+# Full snapshot
+python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --out data/ofac_sdn.csv
+
+# Filter to specific program
+python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --program SDGT --out data/sdn_sdgt.csv
+
+# Entities only (skip individuals, vessels, aircraft)
+python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --entity-type entity --out data/sdn_entities.csv
+```
+
+## 8. Legal & Licensing
+
+- Public record under Executive Order authority and statutory sanctions programs
+- US persons MUST screen against this list — it is enforced
+- No restrictions on the data itself; restrictions are on transactions with
+  the listed entities
+- ZERO penalty for "over-matching" — false positives must be cleared but are not
+  prohibited
+
+## 9. References
+
+- OFAC home: https://ofac.treasury.gov/
+- SDN list: https://ofac.treasury.gov/specially-designated-nationals-and-blocked-persons-list-sdn-human-readable-lists
+- Data formats: https://ofac.treasury.gov/sdn-list/sanctions-list-search-tool
+- Compliance guidance: https://ofac.treasury.gov/recent-actions
diff --git a/optional-skills/research/osint-investigation/references/sources/opencorporates.md b/optional-skills/research/osint-investigation/references/sources/opencorporates.md
new file mode 100644
index 00000000000..0bd190a2f49
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/opencorporates.md
@@ -0,0 +1,103 @@
+# OpenCorporates — Global Corporate Registry
+
+## 1. Summary
+
+OpenCorporates aggregates corporate registry data from 130+ jurisdictions
+worldwide (~200M companies). Covers US state-level filings (NY DOS, Delaware
+DOC, California SOS, etc.), UK Companies House, EU registries, and most
+common-law jurisdictions.
+
+## 2. Access Methods
+
+- **REST API:** `https://api.opencorporates.com/v0.4/`
+- **HTML fallback:** `https://opencorporates.com/companies?q=...`
+- **Auth:** API token required (free tier 500 calls/month, paid plans available)
+- **Rate limit:** Token-bound; un-tokened requests return 401
+
+Set `OPENCORPORATES_API_TOKEN` env var. Get a free token at
+https://opencorporates.com/api_accounts/new.
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_opencorporates.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `name` | str | Company legal name |
+| `company_number` | str | Registry-assigned number |
+| `jurisdiction_code` | str | e.g. `us_ny`, `us_de`, `gb` |
+| `jurisdiction_name` | str | Human-readable jurisdiction |
+| `incorporation_date` | str | YYYY-MM-DD |
+| `dissolution_date` | str | YYYY-MM-DD (empty if active) |
+| `company_type` | str | Domestic LLC / Foreign Corp / etc. |
+| `status` | str | Active / Inactive / Dissolved |
+| `registered_address` | str | Registered office address |
+| `opencorporates_url` | str | Link to OpenCorporates entity page |
+| `officers_count` | str | Total officers on record |
+| `source` | str | `api`, `html`, or `html-fallback` |
+
+## 4. Coverage
+
+- US: all 50 states + DC at state level (LLCs, corps, LPs)
+- International: UK, EU, Canada, Australia, NZ, many APAC + LATAM jurisdictions
+- ~200M company records cumulative
+- Update frequency varies by jurisdiction (UK CH is near-realtime; some
+  state registries lag months)
+
+## 5. Cross-Reference Potential
+
+- **NYC ACRIS** ↔ `name` (LLC/corp owners of NYC property)
+- **USAspending** ↔ `name` (corporate federal contractors)
+- **SEC EDGAR** ↔ `name` (public companies + their subsidiaries)
+- **ICIJ Offshore** ↔ `name` (international corporate structures)
+
+Join key: normalized company name. Some entries have `previous_names` arrays
+which are not currently exported by the fetch script — query OC directly
+for that.
+
+## 6. Data Quality
+
+- Company-name spellings vary across re-incorporations and renames
+- Officer records are spottier than company records (many jurisdictions
+  don't require officer disclosure)
+- Beneficial-ownership data is generally NOT here — most jurisdictions
+  don't require it. UK Companies House has PSC (people with significant
+  control) but that's not universal.
+- Cross-jurisdictional links (parent / subsidiary) are based on registry
+  filings only; corporate trees are often incomplete
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_opencorporates.py`
+
+```bash
+# Search globally by name
+python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
+    --out data/oc.csv
+
+# Restrict to a jurisdiction
+python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
+    --jurisdiction us_ny --out data/oc_ny.csv
+
+# Set token via env or flag
+OPENCORPORATES_API_TOKEN=xxx python3 SKILL_DIR/scripts/fetch_opencorporates.py \
+    --query "Microsoft" --out data/oc.csv
+```
+
+Without a token the script falls back to scraping the HTML search page.
+The fallback is brittle and only fills in `name`, `jurisdiction_code`,
+`opencorporates_url` — set the token for serious work.
+
+## 8. Legal & Licensing
+
+- OpenCorporates aggregates public records — the underlying facts are
+  public domain
+- OpenCorporates own database is licensed CC-BY-SA-4.0; attribution required
+- API ToS prohibits redistributing the full dataset; per-record reference
+  is fine
+
+## 9. References
+
+- API docs: https://api.opencorporates.com/documentation/API-Reference
+- Jurisdiction codes: https://api.opencorporates.com/v0.4/jurisdictions.json
+- Schema: https://opencorporates.com/info/our_data
diff --git a/optional-skills/research/osint-investigation/references/sources/sec-edgar.md b/optional-skills/research/osint-investigation/references/sources/sec-edgar.md
new file mode 100644
index 00000000000..55a33d70258
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/sec-edgar.md
@@ -0,0 +1,83 @@
+# SEC EDGAR — Corporate Filings
+
+## 1. Summary
+
+EDGAR (Electronic Data Gathering, Analysis, and Retrieval) is the SEC's system
+for corporate disclosure filings: 10-K (annual), 10-Q (quarterly), 8-K (current
+events), DEF 14A (proxy), Form 4 (insider trading), 13F (institutional holdings).
+
+## 2. Access Methods
+
+- **API:** `https://data.sec.gov/submissions/CIK<10-digit-padded>.json` (no auth)
+- **Filing index:** `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=...`
+- **Full-text search:** `https://efts.sec.gov/LATEST/search-index?q=...`
+- **Auth:** None — requires `User-Agent` header with contact info per SEC policy
+- **Rate limit:** 10 requests/second per IP (enforced)
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_sec_edgar.py` (filings index):
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `cik` | str | Central Index Key (10-digit padded) |
+| `company_name` | str | Registrant name |
+| `form_type` | str | 10-K, 10-Q, 8-K, etc. |
+| `filing_date` | str | YYYY-MM-DD |
+| `accession_number` | str | Filing accession (e.g. 0000320193-24-000123) |
+| `primary_document` | str | Filename of main document |
+| `filing_url` | str | Direct URL to filing index |
+| `reporting_period` | str | Period of report (where applicable) |
+
+## 4. Coverage
+
+- All public US registrants from 1993 → present
+- 1993-2000 has spotty coverage of older filings (paper-to-electronic migration)
+- ~12M filings cumulative
+- Updated within minutes of filing acceptance
+
+## 5. Cross-Reference Potential
+
+- **USAspending** ↔ `company_name` (public companies as federal contractors)
+- **Senate LD** ↔ `company_name` (public companies hire lobbyists)
+- **OFAC SDN** ↔ `company_name` (sanctions screening of public registrants)
+
+Join key: company name OR CIK if you have it. CIK is canonical and stable.
+
+## 6. Data Quality
+
+- Subsidiaries often filed under parent CIK — be careful with name matches
+- Name changes over time (rebrands, acquisitions) — CIK remains constant
+- 10-K Item 1A Risk Factors are free-form text — useful for `web_extract`-style
+  parsing, not structured queries
+- Foreign private issuers file 20-F instead of 10-K
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_sec_edgar.py`
+
+```bash
+# By CIK
+python3 SKILL_DIR/scripts/fetch_sec_edgar.py --cik 0000320193 \
+    --types 10-K,10-Q --out data/edgar_filings.csv
+
+# By company name (resolves to CIK first via name search)
+python3 SKILL_DIR/scripts/fetch_sec_edgar.py --company "APPLE INC" \
+    --types 8-K --since 2024-01-01 --out data/edgar_filings.csv
+```
+
+Set `SEC_USER_AGENT` env var with your contact email (SEC requirement).
+Example: `SEC_USER_AGENT="Research example@example.com"`.
+
+## 8. Legal & Licensing
+
+- Public record under SEC Rule 24b-2 / 17 CFR § 230.401
+- No commercial use restrictions on filing content
+- SEC asks all bulk users to include a `User-Agent` with contact info and to
+  respect 10 req/s — failure to do so can result in IP blocking
+
+## 9. References
+
+- Developer docs: https://www.sec.gov/edgar/sec-api-documentation
+- EDGAR full-text search: https://efts.sec.gov/LATEST/search-index
+- Fair access policy: https://www.sec.gov/os/accessing-edgar-data
diff --git a/optional-skills/research/osint-investigation/references/sources/senate-ld.md b/optional-skills/research/osint-investigation/references/sources/senate-ld.md
new file mode 100644
index 00000000000..5142dc6ea41
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/senate-ld.md
@@ -0,0 +1,89 @@
+# Senate LD — Lobbying Disclosure (LD-1 / LD-2)
+
+## 1. Summary
+
+The Senate Office of Public Records publishes lobbying disclosures under the
+Lobbying Disclosure Act of 1995 (LDA, as amended by HLOGA 2007). LD-1 is
+registration of a new client-lobbyist relationship; LD-2 is the quarterly
+activity report.
+
+## 2. Access Methods
+
+- **API:** `https://lda.senate.gov/api/v1/` (no auth required for read-only)
+- **Bulk download:** `https://lda.senate.gov/api/v1/filings/?format=csv` (paginated)
+- **Auth:** Token required for >120 req/hour — register at https://lda.senate.gov/api/auth/register/
+- **Rate limit:** 120 req/hour unauthenticated, 1,200 req/hour authenticated
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_senate_ld.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `filing_uuid` | str | Unique filing ID |
+| `filing_type` | str | LD-1, LD-2, LD-203, etc. |
+| `filing_year` | int | Year |
+| `filing_period` | str | Q1/Q2/Q3/Q4 or annual |
+| `registrant_name` | str | Lobbying firm or organization |
+| `registrant_id` | str | Senate-assigned registrant ID |
+| `client_name` | str | Client being represented |
+| `client_id` | str | Senate-assigned client ID |
+| `client_general_description` | str | Client industry / business |
+| `income` | float | LD-2 income from client this quarter (USD) |
+| `expenses` | float | LD-2 expenses (in-house lobbying) |
+| `lobbyists` | str | Semicolon-separated lobbyist names |
+| `issues` | str | Semicolon-separated issue areas |
+| `government_entities` | str | Agencies/chambers contacted |
+| `filing_date` | str | YYYY-MM-DD |
+
+## 4. Coverage
+
+- US federal lobbying only (state lobbying handled by individual state ethics offices)
+- 1999 → present (full electronic coverage from 2008)
+- Quarterly reporting cycle (LD-2)
+- ~1M+ filings cumulative
+
+## 5. Cross-Reference Potential
+
+- **USAspending** ↔ `client_name` (clients lobbying for contracts)
+- **SEC EDGAR** ↔ `client_name` (public companies as lobbying clients)
+- **OFAC SDN** ↔ `client_name` (sanctions screening of lobbying clients)
+
+Join key: normalized client_name. registrant_id and client_id are canonical
+when joining Senate-internal records.
+
+## 6. Data Quality
+
+- Many lobbyist names appear in multiple registrants over time (job changes)
+- `issues` and `government_entities` are free-text — Inconsistent capitalization
+- Foreign agents register under FARA (Department of Justice), NOT here
+- Income/expenses are reported in $10,000 brackets in some older filings
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_senate_ld.py`
+
+```bash
+# By client
+python3 SKILL_DIR/scripts/fetch_senate_ld.py --client "EXAMPLE CORP" \
+    --year 2024 --out data/lobbying.csv
+
+# By registrant (lobbying firm)
+python3 SKILL_DIR/scripts/fetch_senate_ld.py --registrant "BIG K STREET LLP" \
+    --year 2024 --out data/lobbying.csv
+```
+
+Set `SENATE_LDA_TOKEN` env var if you have one (or pass `--token`).
+Defaults to anonymous (120 req/hour).
+
+## 8. Legal & Licensing
+
+- Public record under 2 U.S.C. § 1604 (LDA)
+- No commercial use restrictions
+- Reuse is unconditional — see Senate Public Records Office disclaimer
+
+## 9. References
+
+- API docs: https://lda.senate.gov/api/redoc/v1/
+- LDA guidance: https://lobbyingdisclosure.house.gov/ld_guidance.pdf
+- Senate Public Records: https://lda.senate.gov/
diff --git a/optional-skills/research/osint-investigation/references/sources/usaspending.md b/optional-skills/research/osint-investigation/references/sources/usaspending.md
new file mode 100644
index 00000000000..6477272293b
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/usaspending.md
@@ -0,0 +1,97 @@
+# USAspending — Federal Government Contracts and Grants
+
+## 1. Summary
+
+USAspending.gov is the official source of federal spending data. Coverage:
+contracts, grants, loans, direct payments, sub-awards. Required by the DATA Act
+of 2014 — all federal agencies must report to a single schema.
+
+## 2. Access Methods
+
+- **API v2:** `https://api.usaspending.gov/api/v2/` (no auth, no key)
+- **Bulk:** `https://files.usaspending.gov/` (CSV / Parquet by award type)
+- **Auth:** None
+- **Rate limit:** Not strictly enforced, but be polite — keep to <10 req/s
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_usaspending.py` (prime awards):
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `award_id` | str | Federal award ID (PIID for contracts, FAIN for grants) |
+| `recipient_name` | str | Awardee legal name |
+| `recipient_uei` | str | Unique Entity Identifier (replaced DUNS in 2022) |
+| `recipient_duns` | str | Legacy DUNS number (historical only) |
+| `recipient_parent_name` | str | Ultimate parent organization |
+| `recipient_state` | str | Recipient state |
+| `awarding_agency` | str | Department / agency name |
+| `awarding_sub_agency` | str | Sub-tier (e.g. DoD → Army) |
+| `award_type` | str | Contract / Grant / Loan / Direct Payment |
+| `award_amount` | float | Current total obligation in USD |
+| `award_date` | str | Action / signed date YYYY-MM-DD |
+| `period_of_performance_start` | str | YYYY-MM-DD |
+| `period_of_performance_end` | str | YYYY-MM-DD |
+| `naics_code` | str | Industry classification |
+| `psc_code` | str | Product / Service Code |
+| `competition_extent` | str | Full / limited / sole-source |
+| `description` | str | Award description (free-text) |
+
+## 4. Coverage
+
+- US federal awards only (state/local not included)
+- FY 2008 → present (full coverage from FY 2017)
+- Updated bi-weekly from agency reporting
+- ~100M+ transaction records cumulative
+
+## 5. Cross-Reference Potential
+
+- **SEC EDGAR** ↔ `recipient_name` (public companies as contractors)
+- **Senate LD** ↔ `recipient_name` (lobbying clients winning contracts)
+- **OFAC SDN** ↔ `recipient_name` (sanctions screening of contractors — must be
+  filtered out by SAM.gov but verify)
+- **ICIJ Offshore** ↔ `recipient_name` (offshore-linked contractors)
+
+Join key: normalized recipient name. UEI is canonical when present.
+
+## 6. Data Quality
+
+- DUNS → UEI transition (April 2022) — old records have DUNS, new records have UEI
+- Some sub-awards aren't reported (FFATA threshold is $30k)
+- Award amount changes over time (mod actions) — fetch script reports current total
+- `competition_extent` field is free-text in older records — `fetch_usaspending.py`
+  normalizes to canonical values
+- Recipient name variations are extensive — "ACME LLC", "Acme L.L.C.", "ACME, INC"
+  all appear. Use `entity_resolution.py`.
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_usaspending.py`
+
+```bash
+# By recipient name
+python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
+    --fy 2024 --out data/contracts.csv
+
+# By awarding agency
+python3 SKILL_DIR/scripts/fetch_usaspending.py --agency "Department of Defense" \
+    --fy 2024 --out data/contracts.csv
+
+# Filter to sole-source only
+python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
+    --fy 2024 --sole-source-only --out data/contracts.csv
+```
+
+## 8. Legal & Licensing
+
+- Public record under the Federal Funding Accountability and Transparency Act
+  (FFATA, 2006) and DATA Act (2014)
+- No commercial use restrictions on the data
+- Personal information of award recipients (e.g. small business owners' addresses
+  in some grants) should be handled per the source agency's privacy notice
+
+## 9. References
+
+- API docs: https://api.usaspending.gov/
+- Data dictionary: https://www.usaspending.gov/data-dictionary
+- Award schema: https://files.usaspending.gov/docs/Data_Dictionary_Crosswalk.xlsx
diff --git a/optional-skills/research/osint-investigation/references/sources/wayback.md b/optional-skills/research/osint-investigation/references/sources/wayback.md
new file mode 100644
index 00000000000..f397c093a23
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/wayback.md
@@ -0,0 +1,93 @@
+# Wayback Machine — Internet Archive CDX
+
+## 1. Summary
+
+The Internet Archive's Wayback Machine has captured ~900B+ web pages since
+1996. The CDX server API indexes those captures by URL, timestamp, and
+content hash. Free, anonymous, no auth.
+
+## 2. Access Methods
+
+- **CDX server:** `https://web.archive.org/cdx/search/cdx`
+- **Wayback URL:** `https://web.archive.org/web/<timestamp>/<url>`
+- **Save Page Now (write):** `https://web.archive.org/save/<url>` (different API)
+- **Auth:** None
+- **Rate limit:** Generous; be polite (~1 req/s)
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_wayback.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `url` | str | Original URL captured |
+| `timestamp` | str | YYYYMMDDHHMMSS (CDX format) |
+| `wayback_url` | str | Direct replay URL |
+| `mimetype` | str | Content-type at capture |
+| `status` | str | HTTP status (typically 200) |
+| `digest` | str | SHA1 of capture content (collapse-friendly) |
+| `length` | str | Byte length of capture |
+
+## 4. Coverage
+
+- 1996 → present
+- ~900B+ captures across ~700M domains
+- Updated continuously by automated crawls + manual saves
+- Some domains have aggressive coverage (news), others sparse (private)
+
+## 5. Cross-Reference Potential
+
+- **Wikipedia** ↔ Reverse-lookup pages cited as references that have since
+  disappeared
+- **News URLs** ↔ Original article content when present-day URLs 404
+- **Corporate websites** ↔ Historical "About" pages, executive bios that
+  have been scrubbed
+
+The Wayback CDX is most useful as a **content-recovery** layer when other
+sources point to URLs that no longer exist.
+
+## 6. Data Quality
+
+- robots.txt-blocked domains may have spotty or no coverage
+- Captures vary in completeness (HTML may be saved without CSS/JS)
+- Some content is excluded by domain owner request (DMCA, etc.)
+- Coverage of "deep links" (URLs with query strings) is uneven
+- Time resolution is per-capture, not continuous — gaps are common
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_wayback.py`
+
+```bash
+# All captures of a specific URL
+python3 SKILL_DIR/scripts/fetch_wayback.py --url "https://example.com/page" \
+    --out data/wb.csv
+
+# All captures of a host
+python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
+    --match host --out data/wb.csv
+
+# All captures of a domain + subdomains
+python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
+    --match domain --out data/wb.csv
+
+# Only unique-content captures within a date window
+python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
+    --match host --collapse digest \
+    --from-date 2020-01-01 --to-date 2023-12-31 \
+    --out data/wb.csv
+```
+
+## 8. Legal & Licensing
+
+- Internet Archive captures are made under fair-use research provisions
+- Replay URLs are stable references — citing them is encouraged
+- Internet Archive non-profit terms of use govern content
+- Some content is rights-restricted; replay may be blocked even if the
+  CDX entry shows it as captured
+
+## 9. References
+
+- CDX server docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
+- Wayback API: https://archive.org/help/wayback_api.php
+- Internet Archive: https://archive.org/
diff --git a/optional-skills/research/osint-investigation/references/sources/wikipedia.md b/optional-skills/research/osint-investigation/references/sources/wikipedia.md
new file mode 100644
index 00000000000..1a004bf2e8d
--- /dev/null
+++ b/optional-skills/research/osint-investigation/references/sources/wikipedia.md
@@ -0,0 +1,107 @@
+# Wikipedia + Wikidata
+
+## 1. Summary
+
+Wikipedia is the canonical narrative-bio source for notable people, places,
+and organizations. Wikidata is its structured-data counterpart: ~110M
+items, each with claims, dates, identifiers, and cross-references to
+external authorities (VIAF, ISNI, ORCID, GRID, etc.).
+
+Together they're a high-precision entity-resolution layer — the bar for
+inclusion is real, but anything past that bar is well-cross-referenced.
+
+## 2. Access Methods
+
+- **Wikipedia OpenSearch:** `https://en.wikipedia.org/w/api.php?action=opensearch`
+- **Wikipedia REST summary:** `https://en.wikipedia.org/api/rest_v1/page/summary/<title>`
+- **Wikidata Action API:** `https://www.wikidata.org/w/api.php?action=wbgetentities`
+- **Wikidata SPARQL:** `https://query.wikidata.org/sparql` (more powerful but aggressively rate-limited)
+- **Auth:** None, but **a meaningful User-Agent is required**
+
+Set `HERMES_OSINT_UA` to something identifying (e.g. `your-app/1.0 (you@example.com)`).
+Wikimedia returns HTTP 429 to generic UAs.
+
+## 3. Data Schema
+
+Key fields emitted by `fetch_wikipedia.py`:
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `source` | str | `wikipedia` or `wikipedia+wikidata` |
+| `label` | str | Wikipedia article title |
+| `description` | str | Short Wikidata description |
+| `qid` | str | Wikidata QID (e.g. Q2283 for Microsoft) |
+| `wikipedia_title`, `wikipedia_url` | str | Article identifier + URL |
+| `wikidata_url` | str | Wikidata entity URL |
+| `instance_of` | str | What kind of thing it is (P31) |
+| `country` | str | Country (P17 for orgs/places, P27 for people) |
+| `occupation` | str | P106 |
+| `employer` | str | P108 |
+| `date_of_birth` | str | P569, YYYY-MM-DD |
+| `place_of_birth` | str | P19 |
+| `summary` | str | Wikipedia REST extract (~1000 chars) |
+
+The fetch script uses Wikidata's Action API (NOT SPARQL) for structured
+facts — far more lenient on rate limits.
+
+## 4. Coverage
+
+- Wikipedia EN: ~7M articles
+- Wikidata: ~110M items, ~1.5B statements
+- Updated continuously; abuse filters and bots run constantly
+- High notability bar — most private individuals are not in Wikipedia
+
+## 5. Cross-Reference Potential
+
+- **All sources** ↔ `label` (entity identity resolution)
+- **SEC EDGAR** ↔ `label` (public companies)
+- **CourtListener** ↔ `label` (parties to notable litigation)
+- **Wikidata external identifiers** (not currently in this fetcher's output)
+  link to VIAF, ISNI, ORCID, GRID, GitHub, Twitter, IMDb, ...
+
+Join key: Wikidata QID is canonical. Wikipedia titles are stable for
+most articles but can be renamed.
+
+## 6. Data Quality
+
+- Notability filter — only notable entities (criteria vary by topic)
+- Recency lag — current events take days to weeks to be reflected
+- POV / vandalism — moderated, but edits between sweeps can be bad
+- Living-persons biographies have stricter sourcing requirements
+- Wikidata claims have qualifiers and references — the fetch script
+  doesn't currently export them
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_wikipedia.py`
+
+```bash
+# Look up a notable entity
+python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Microsoft" --out data/wp.csv
+
+# A specific person
+python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Bill Gates" --out data/wp_bg.csv
+
+# Skip the Wikidata enrichment for speed
+python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Microsoft" --no-wikidata \
+    --limit 5 --out data/wp.csv
+```
+
+The OpenSearch is fuzzy — `--limit 5` returns the top 5 Wikipedia article
+matches. Each is enriched with the QID + structured facts unless
+`--no-wikidata` is passed.
+
+## 8. Legal & Licensing
+
+- Wikipedia text: CC-BY-SA-3.0 / GFDL
+- Wikidata claims: CC0 (public domain)
+- API ToS: respect rate limits, identify your agent
+- Commercial use allowed with attribution
+
+## 9. References
+
+- Wikipedia OpenSearch: https://www.mediawiki.org/wiki/API:Opensearch
+- Wikipedia REST: https://en.wikipedia.org/api/rest_v1/
+- Wikidata Action API: https://www.wikidata.org/wiki/Wikidata:Data_access
+- Wikidata SPARQL: https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service
+- User-Agent policy: https://meta.wikimedia.org/wiki/User-Agent_policy
diff --git a/optional-skills/research/osint-investigation/scripts/_http.py b/optional-skills/research/osint-investigation/scripts/_http.py
new file mode 100644
index 00000000000..5da62310b9f
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/_http.py
@@ -0,0 +1,82 @@
+"""Tiny stdlib HTTP helper used by fetch_*.py scripts.
+
+Provides polite retry + JSON convenience + User-Agent enforcement.
+"""
+from __future__ import annotations
+
+import json
+import os
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+
+DEFAULT_UA = (
+    "hermes-osint-investigation/0.2 "
+    "(+https://github.com/NousResearch/hermes-agent; "
+    "set HERMES_OSINT_UA env var to identify yourself per "
+    "Wikimedia / SEC fair-use guidance)"
+)
+
+
+def get(
+    url: str,
+    *,
+    params: dict | None = None,
+    headers: dict | None = None,
+    user_agent: str | None = None,
+    max_retries: int = 3,
+    backoff: float = 1.5,
+    timeout: float = 30.0,
+) -> bytes:
+    """GET with retry on 5xx and Retry-After honoring.
+
+    429 (rate-limit) is raised IMMEDIATELY with a clear message — retrying
+    when the upstream says "you're over quota" just wastes time. The caller
+    should slow down or supply real credentials.
+    """
+    if params:
+        sep = "&" if "?" in url else "?"
+        url = f"{url}{sep}{urllib.parse.urlencode(params)}"
+    h = {"User-Agent": user_agent or os.environ.get("HERMES_OSINT_UA", DEFAULT_UA)}
+    if headers:
+        h.update(headers)
+
+    last_err: Exception | None = None
+    for attempt in range(max_retries + 1):
+        req = urllib.request.Request(url, headers=h)
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as resp:
+                return resp.read()
+        except urllib.error.HTTPError as e:
+            if e.code == 429:
+                # Surface immediately. Read the body so the caller sees the
+                # provider's actual message ("OVER_RATE_LIMIT" etc.).
+                try:
+                    body = e.read(2048).decode("utf-8", errors="replace")
+                except Exception:  # noqa: BLE001
+                    body = ""
+                raise RuntimeError(
+                    f"HTTP 429 rate-limited by {urllib.parse.urlsplit(url).netloc}. "
+                    f"Slow down or supply a real API key. Body: {body[:300]}"
+                ) from e
+            if e.code in (500, 502, 503, 504) and attempt < max_retries:
+                retry_after = e.headers.get("Retry-After") if e.headers else None
+                wait = float(retry_after) if (retry_after and retry_after.isdigit()) else backoff ** (attempt + 1)
+                time.sleep(wait)
+                last_err = e
+                continue
+            raise
+        except urllib.error.URLError as e:
+            if attempt < max_retries:
+                time.sleep(backoff ** (attempt + 1))
+                last_err = e
+                continue
+            raise
+    if last_err:
+        raise last_err
+    raise RuntimeError("unreachable")
+
+
+def get_json(url: str, **kwargs) -> dict | list:
+    return json.loads(get(url, **kwargs).decode("utf-8"))
diff --git a/optional-skills/research/osint-investigation/scripts/_normalize.py b/optional-skills/research/osint-investigation/scripts/_normalize.py
new file mode 100644
index 00000000000..3c9a197af8b
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/_normalize.py
@@ -0,0 +1,67 @@
+"""Shared entity-name normalization helpers (stdlib-only).
+
+Used by entity_resolution.py and timing_analysis.py.
+"""
+from __future__ import annotations
+
+import re
+
+# Legal suffixes / corporate boilerplate to strip during normalization.
+_SUFFIX_TOKENS = {
+    "INC", "INCORPORATED", "LLC", "LLP", "LP", "LTD", "LIMITED",
+    "CORP", "CORPORATION", "CO", "COMPANY",
+    "GROUP", "GRP", "HOLDINGS", "HOLDING",
+    "PARTNERS", "ASSOCIATES",
+    "INTERNATIONAL", "INTL",
+    "ENTERPRISES", "ENTERPRISE",
+    "SERVICES", "SERVICE", "SVCS",
+    "SOLUTIONS", "MANAGEMENT", "MGMT", "CONSULTING",
+    "TECHNOLOGY", "TECHNOLOGIES", "TECH",
+    "INDUSTRIES", "INDUSTRY",
+    "AMERICA", "AMERICAN",
+    "USA", "US",
+    "PLLC", "PC",
+    "TRUST", "FOUNDATION",
+}
+
+_PUNCT_RE = re.compile(r"[^\w\s]")
+_WS_RE = re.compile(r"\s+")
+
+
+def normalize_name(name: str | None) -> str:
+    """Standard normalization: uppercase, strip suffixes, drop punctuation."""
+    if not name:
+        return ""
+    s = _PUNCT_RE.sub(" ", name.upper())
+    s = _WS_RE.sub(" ", s).strip()
+    tokens = [t for t in s.split() if t and t not in _SUFFIX_TOKENS]
+    return " ".join(tokens)
+
+
+def normalize_aggressive(name: str | None) -> str:
+    """Aggressive normalization: sorted unique tokens (word-bag)."""
+    base = normalize_name(name)
+    if not base:
+        return ""
+    return " ".join(sorted(set(base.split())))
+
+
+def name_tokens(name: str | None, min_len: int = 4) -> set[str]:
+    """Token set used for overlap matching."""
+    base = normalize_name(name)
+    if not base:
+        return set()
+    return {t for t in base.split() if len(t) >= min_len}
+
+
+def token_overlap_ratio(left: str | None, right: str | None) -> tuple[float, int]:
+    """Return (jaccard-like ratio, shared token count) over min-len tokens."""
+    a = name_tokens(left)
+    b = name_tokens(right)
+    if not a or not b:
+        return 0.0, 0
+    shared = a & b
+    if not shared:
+        return 0.0, 0
+    union = a | b
+    return len(shared) / len(union), len(shared)
diff --git a/optional-skills/research/osint-investigation/scripts/build_findings.py b/optional-skills/research/osint-investigation/scripts/build_findings.py
new file mode 100644
index 00000000000..15021eb0878
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/build_findings.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""Build a structured findings.json with evidence chains (stdlib-only).
+
+Aggregates cross_links.csv (entity_resolution output) and an optional
+timing.json (timing_analysis output) into a single evidence-chain document.
+
+Output structure:
+    {
+      "metadata": {...},
+      "findings": [
+        {
+          "id": "F0001",
+          "title": "...",
+          "severity": "HIGH|MEDIUM|LOW",
+          "confidence": "high|medium|low",
+          "summary": "...",
+          "evidence": [
+            {"source": "cross_links.csv", "row": 12, "fields": {...}},
+            ...
+          ],
+          "sources": ["cross_links.csv", "timing.json"]
+        }
+      ]
+    }
+
+Every finding traces to specific source rows. No naked claims.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+from collections import defaultdict
+from pathlib import Path
+
+CONFIDENCE_ORDER = {"high": 0, "medium": 1, "low": 2}
+SEVERITY_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
+
+
+def _read_cross_links(path: str) -> list[dict[str, str]]:
+    with open(path, newline="", encoding="utf-8") as fh:
+        return list(csv.DictReader(fh))
+
+
+def build_findings(
+    cross_links_path: str,
+    timing_path: str | None = None,
+    out_path: str = "findings.json",
+    bundled_threshold: int = 3,
+) -> dict:
+    findings: list[dict] = []
+    next_id = 1
+
+    # 1. Match-based findings, grouped by (left_normalized, right_normalized).
+    matches = _read_cross_links(cross_links_path)
+    grouped: dict[tuple[str, str], list[dict[str, str]]] = defaultdict(list)
+    for i, row in enumerate(matches):
+        row["__row__"] = str(i)
+        grouped[(row.get("left_normalized", ""), row.get("right_normalized", ""))].append(row)
+
+    for (left_norm, right_norm), rows in grouped.items():
+        if not left_norm or not right_norm:
+            continue
+        # Use the highest-confidence match for the finding's overall confidence.
+        best = min(rows, key=lambda r: CONFIDENCE_ORDER.get(r.get("confidence", "low"), 2))
+        finding_id = f"F{next_id:04d}"
+        next_id += 1
+        evidence = [
+            {
+                "source": "cross_links.csv",
+                "row": int(r["__row__"]),
+                "fields": {
+                    "match_type": r.get("match_type", ""),
+                    "confidence": r.get("confidence", ""),
+                    "left_name": r.get("left_name", ""),
+                    "right_name": r.get("right_name", ""),
+                    "overlap_ratio": r.get("overlap_ratio", ""),
+                    "shared_tokens": r.get("shared_tokens", ""),
+                },
+            }
+            for r in rows
+        ]
+        findings.append(
+            {
+                "id": finding_id,
+                "title": f"Entity match: {best.get('left_name', '')} ↔ {best.get('right_name', '')}",
+                "severity": "MEDIUM" if best.get("confidence") == "high" else "LOW",
+                "confidence": best.get("confidence", "low"),
+                "summary": (
+                    f"{len(rows)} cross-link record(s) tie "
+                    f"'{best.get('left_name', '')}' to "
+                    f"'{best.get('right_name', '')}' "
+                    f"(best tier: {best.get('match_type', '')})."
+                ),
+                "evidence": evidence,
+                "sources": ["cross_links.csv"],
+            }
+        )
+
+    # 2. Bundled-donations findings (if cross_links carries donor↔candidate pattern).
+    #    Heuristic: many distinct left names sharing the same right name.
+    by_right: dict[str, set[str]] = defaultdict(set)
+    by_right_rows: dict[str, list[dict[str, str]]] = defaultdict(list)
+    for r in matches:
+        right = r.get("right_normalized", "")
+        left_raw = r.get("left_name", "").strip()
+        if right and left_raw:
+            by_right[right].add(left_raw)
+            by_right_rows[right].append(r)
+    for right_norm, lefts in by_right.items():
+        if len(lefts) < bundled_threshold:
+            continue
+        rows = by_right_rows[right_norm]
+        right_raw = rows[0].get("right_name", "")
+        findings.append(
+            {
+                "id": f"F{next_id:04d}",
+                "title": f"Bundled cross-links: {len(lefts)} distinct left entities ↔ '{right_raw}'",
+                "severity": "HIGH",
+                "confidence": "medium",
+                "summary": (
+                    f"{len(lefts)} distinct left-side entities link to "
+                    f"'{right_raw}'. Pattern suggests coordinated relationship "
+                    f"(e.g. bundled donations, multi-vendor employer)."
+                ),
+                "evidence": [
+                    {
+                        "source": "cross_links.csv",
+                        "row": int(r.get("__row__", "0")),
+                        "fields": {
+                            "left_name": r.get("left_name", ""),
+                            "match_type": r.get("match_type", ""),
+                        },
+                    }
+                    for r in rows
+                ],
+                "sources": ["cross_links.csv"],
+            }
+        )
+        next_id += 1
+
+    # 3. Timing-based findings.
+    if timing_path and Path(timing_path).exists():
+        timing = json.loads(Path(timing_path).read_text())
+        for r in timing.get("results", []):
+            if not r.get("significant"):
+                continue
+            findings.append(
+                {
+                    "id": f"F{next_id:04d}",
+                    "title": (
+                        f"Donation timing significantly clusters near awards: "
+                        f"{r['donor']} ↔ {r['recipient']}"
+                    ),
+                    "severity": "HIGH" if r["p_value"] < 0.01 else "MEDIUM",
+                    "confidence": "medium",
+                    "summary": (
+                        f"Mean nearest-award distance {r['observed_mean_days']} days "
+                        f"(null {r['null_mean_days']} days). p={r['p_value']}, "
+                        f"effect size {r['effect_size_sd']} SD. "
+                        f"{r['n_donations']} donations, {r['n_award_dates']} awards."
+                    ),
+                    "evidence": [
+                        {
+                            "source": "timing.json",
+                            "row": None,
+                            "fields": r,
+                        }
+                    ],
+                    "sources": ["timing.json"],
+                }
+            )
+            next_id += 1
+
+    # Sort: severity → confidence → id.
+    findings.sort(
+        key=lambda f: (
+            SEVERITY_ORDER.get(f["severity"], 3),
+            CONFIDENCE_ORDER.get(f["confidence"], 3),
+            f["id"],
+        )
+    )
+
+    payload = {
+        "metadata": {
+            "n_findings": len(findings),
+            "cross_links_path": cross_links_path,
+            "timing_path": timing_path,
+            "bundled_threshold": bundled_threshold,
+        },
+        "findings": findings,
+    }
+    Path(out_path).write_text(json.dumps(payload, indent=2))
+    return payload
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--cross-links", required=True)
+    p.add_argument("--timing", help="Optional timing.json from timing_analysis.py")
+    p.add_argument("--out", default="findings.json")
+    p.add_argument(
+        "--bundled-threshold",
+        type=int,
+        default=3,
+        help="Minimum distinct left entities to flag as bundled (default 3)",
+    )
+    a = p.parse_args()
+
+    payload = build_findings(
+        cross_links_path=a.cross_links,
+        timing_path=a.timing,
+        out_path=a.out,
+        bundled_threshold=a.bundled_threshold,
+    )
+    print(f"Wrote {payload['metadata']['n_findings']} findings to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/entity_resolution.py b/optional-skills/research/osint-investigation/scripts/entity_resolution.py
new file mode 100644
index 00000000000..26d60d433d4
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/entity_resolution.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""Cross-source entity resolution (stdlib-only).
+
+Given two CSV files with name columns, find candidate matches using three
+tiers of normalization:
+
+  1. exact          — normalized strings equal
+  2. fuzzy          — sorted-token (word-bag) match
+  3. token_overlap  — >=60% Jaccard overlap on >=4-char tokens, >=2 shared
+
+Adapted from ShinMegamiBoson/OpenPlanter (MIT) but generalized: no Boston-
+specific record types, no contribution-code filters, no fixed schemas.
+
+Output CSV columns:
+    match_type, confidence, left_name, right_name,
+    left_normalized, right_normalized, left_row, right_row,
+    overlap_ratio, shared_tokens
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import sys
+from pathlib import Path
+
+# Allow running directly or as a module.
+sys.path.insert(0, str(Path(__file__).parent))
+from _normalize import (  # noqa: E402
+    normalize_name,
+    normalize_aggressive,
+    token_overlap_ratio,
+)
+
+CONFIDENCE = {
+    "exact": "high",
+    "fuzzy": "medium",
+    "token_overlap": "low",
+}
+
+
+def _read_csv(path: str, name_col: str) -> list[dict[str, str]]:
+    rows = []
+    with open(path, newline="", encoding="utf-8") as fh:
+        reader = csv.DictReader(fh)
+        if name_col not in (reader.fieldnames or []):
+            raise SystemExit(
+                f"Column {name_col!r} not in {path}. "
+                f"Available: {reader.fieldnames}"
+            )
+        for i, row in enumerate(reader):
+            row["__row__"] = str(i)
+            rows.append(row)
+    return rows
+
+
+def _build_index(rows: list[dict[str, str]], name_col: str):
+    """Index by exact-normalized and aggressive (sorted-token) form."""
+    exact: dict[str, list[dict[str, str]]] = {}
+    aggressive: dict[str, list[dict[str, str]]] = {}
+    for row in rows:
+        raw = row.get(name_col, "")
+        n = normalize_name(raw)
+        if n:
+            exact.setdefault(n, []).append(row)
+        a = normalize_aggressive(raw)
+        if a:
+            aggressive.setdefault(a, []).append(row)
+    return exact, aggressive
+
+
+def _emit(
+    out_rows: list[dict[str, str]],
+    seen: set[tuple],
+    match_type: str,
+    left_row: dict[str, str],
+    right_row: dict[str, str],
+    left_col: str,
+    right_col: str,
+    ratio: float = 0.0,
+    shared: int = 0,
+):
+    left_raw = left_row.get(left_col, "")
+    right_raw = right_row.get(right_col, "")
+    key = (
+        left_row["__row__"],
+        right_row["__row__"],
+        match_type,
+    )
+    if key in seen:
+        return
+    seen.add(key)
+    out_rows.append(
+        {
+            "match_type": match_type,
+            "confidence": CONFIDENCE[match_type],
+            "left_name": left_raw,
+            "right_name": right_raw,
+            "left_normalized": normalize_name(left_raw),
+            "right_normalized": normalize_name(right_raw),
+            "left_row": left_row["__row__"],
+            "right_row": right_row["__row__"],
+            "overlap_ratio": f"{ratio:.3f}" if ratio else "",
+            "shared_tokens": str(shared) if shared else "",
+        }
+    )
+
+
+def resolve(
+    left_path: str,
+    left_col: str,
+    right_path: str,
+    right_col: str,
+    out_path: str,
+    overlap_threshold: float = 0.60,
+    min_shared: int = 2,
+    skip_overlap: bool = False,
+) -> int:
+    left_rows = _read_csv(left_path, left_col)
+    right_rows = _read_csv(right_path, right_col)
+
+    right_exact, right_aggressive = _build_index(right_rows, right_col)
+
+    out_rows: list[dict[str, str]] = []
+    seen: set[tuple] = set()
+
+    # Pass 1+2: exact / fuzzy via index lookup.
+    for lrow in left_rows:
+        raw = lrow.get(left_col, "")
+        n = normalize_name(raw)
+        if not n:
+            continue
+        for rrow in right_exact.get(n, []):
+            _emit(out_rows, seen, "exact", lrow, rrow, left_col, right_col)
+        a = normalize_aggressive(raw)
+        if a:
+            for rrow in right_aggressive.get(a, []):
+                _emit(out_rows, seen, "fuzzy", lrow, rrow, left_col, right_col)
+
+    if not skip_overlap:
+        # Pass 3: token overlap (O(N*M) — expensive; allow opt-out).
+        for lrow in left_rows:
+            l_raw = lrow.get(left_col, "")
+            if not normalize_name(l_raw):
+                continue
+            for rrow in right_rows:
+                ratio, shared = token_overlap_ratio(
+                    l_raw, rrow.get(right_col, "")
+                )
+                if ratio >= overlap_threshold and shared >= min_shared:
+                    _emit(
+                        out_rows,
+                        seen,
+                        "token_overlap",
+                        lrow,
+                        rrow,
+                        left_col,
+                        right_col,
+                        ratio=ratio,
+                        shared=shared,
+                    )
+
+    fieldnames = [
+        "match_type",
+        "confidence",
+        "left_name",
+        "right_name",
+        "left_normalized",
+        "right_normalized",
+        "left_row",
+        "right_row",
+        "overlap_ratio",
+        "shared_tokens",
+    ]
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        writer = csv.DictWriter(fh, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(out_rows)
+    return len(out_rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--left", required=True, help="Left CSV path")
+    p.add_argument(
+        "--left-name-col", required=True, help="Name column in left CSV"
+    )
+    p.add_argument("--right", required=True, help="Right CSV path")
+    p.add_argument(
+        "--right-name-col",
+        required=True,
+        help="Name column in right CSV",
+    )
+    p.add_argument("--out", required=True, help="Output CSV path")
+    p.add_argument(
+        "--overlap-threshold",
+        type=float,
+        default=0.60,
+        help="Jaccard overlap threshold for token_overlap tier (default 0.60)",
+    )
+    p.add_argument(
+        "--min-shared",
+        type=int,
+        default=2,
+        help="Minimum shared tokens for token_overlap tier (default 2)",
+    )
+    p.add_argument(
+        "--skip-overlap",
+        action="store_true",
+        help="Skip the O(N*M) token_overlap pass (much faster on large CSVs)",
+    )
+    args = p.parse_args()
+
+    count = resolve(
+        left_path=args.left,
+        left_col=args.left_name_col,
+        right_path=args.right,
+        right_col=args.right_name_col,
+        out_path=args.out,
+        overlap_threshold=args.overlap_threshold,
+        min_shared=args.min_shared,
+        skip_overlap=args.skip_overlap,
+    )
+    print(f"Wrote {count} match rows to {args.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_courtlistener.py b/optional-skills/research/osint-investigation/scripts/fetch_courtlistener.py
new file mode 100644
index 00000000000..db5e715bf57
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_courtlistener.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+"""Search court records via CourtListener (Free Law Project).
+
+Covers ~10M federal and state court opinions, plus PACER docket data
+where available. Public REST API v4 supports anonymous read access for
+search; some endpoints require a token (free at courtlistener.com).
+
+Set COURTLISTENER_TOKEN to authenticate (raises rate limits).
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import os
+import sys
+import urllib.parse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get_json  # noqa: E402
+
+BASE = "https://www.courtlistener.com/api/rest/v4/search/"
+
+COLUMNS = [
+    "case_name",
+    "court",
+    "court_id",
+    "date_filed",
+    "docket_number",
+    "judge",
+    "citation",
+    "result_type",
+    "snippet",
+    "absolute_url",
+]
+
+SEARCH_TYPES = {
+    "opinions": "o",       # Court opinions
+    "dockets": "r",        # PACER dockets (may require auth depending on coverage)
+    "oral": "oa",          # Oral arguments
+    "people": "p",         # Judges / people
+    "recap": "r",          # Same as dockets in v4
+}
+
+
+def fetch(
+    query: str,
+    search_type: str,
+    court: str | None,
+    date_from: str | None,
+    date_to: str | None,
+    token: str | None,
+    limit: int,
+    out_path: str,
+) -> int:
+    type_code = SEARCH_TYPES.get(search_type, search_type)
+    params = {
+        "q": query,
+        "type": type_code,
+    }
+    if court:
+        params["court"] = court
+    if date_from:
+        params["filed_after"] = date_from
+    if date_to:
+        params["filed_before"] = date_to
+    headers = {"Authorization": f"Token {token}"} if token else None
+
+    rows: list[dict[str, str]] = []
+    next_url: str | None = f"{BASE}?{urllib.parse.urlencode(params)}"
+    while next_url and len(rows) < limit:
+        try:
+            payload = get_json(next_url, headers=headers)
+        except Exception as e:  # noqa: BLE001
+            print(f"CourtListener error: {e}", file=sys.stderr)
+            break
+        if not isinstance(payload, dict):
+            break
+        results = payload.get("results", [])
+        for r in results:
+            if len(rows) >= limit:
+                break
+            rows.append(
+                {
+                    "case_name": r.get("caseName", "") or r.get("case_name", "") or "",
+                    "court": r.get("court", "") or "",
+                    "court_id": r.get("court_id", "") or "",
+                    "date_filed": (r.get("dateFiled", "") or r.get("date_filed", "") or "")[:10],
+                    "docket_number": r.get("docketNumber", "") or r.get("docket_number", "") or "",
+                    "judge": r.get("judge", "") or "",
+                    "citation": "; ".join(r.get("citation", []) or []) if isinstance(r.get("citation"), list) else (r.get("citation") or ""),
+                    "result_type": search_type,
+                    "snippet": (r.get("snippet", "") or "").replace("\n", " ")[:500],
+                    "absolute_url": (
+                        f"https://www.courtlistener.com{r.get('absolute_url', '')}"
+                        if r.get("absolute_url", "").startswith("/")
+                        else r.get("absolute_url", "")
+                    ),
+                }
+            )
+        next_url = payload.get("next")
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    if not rows:
+        print(
+            f"CourtListener: 0 results for type={search_type!r} q={query!r}. "
+            "Most private individuals don't appear in published court records "
+            "unless they were party to a federal or state appellate case.",
+            file=sys.stderr,
+        )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--query", required=True, help="Search query (party name, case name, keyword)")
+    p.add_argument(
+        "--type",
+        default="opinions",
+        choices=list(SEARCH_TYPES.keys()),
+        help="Search type (default: opinions)",
+    )
+    p.add_argument("--court", help="Court ID filter (e.g. 'nysd' = SDNY, 'scotus' = Supreme Court)")
+    p.add_argument("--date-from", help="Filed-after date YYYY-MM-DD")
+    p.add_argument("--date-to", help="Filed-before date YYYY-MM-DD")
+    p.add_argument("--token", default=os.environ.get("COURTLISTENER_TOKEN"))
+    p.add_argument("--limit", type=int, default=100)
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(
+        query=a.query,
+        search_type=a.type,
+        court=a.court,
+        date_from=a.date_from,
+        date_to=a.date_to,
+        token=a.token,
+        limit=a.limit,
+        out_path=a.out,
+    )
+    print(f"Wrote {n} CourtListener rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py b/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py
new file mode 100644
index 00000000000..fa98dabc9bb
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""Search the GDELT 2.0 DOC API for news mentions.
+
+GDELT monitors world news in 100+ languages and indexes the full text.
+Free, anonymous, ~15-minute update frequency. Covers ~2015→present.
+
+Useful for surfacing news mentions of a person, company, or topic across
+international media — much wider net than Google News.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import sys
+import time
+import urllib.parse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get_json  # noqa: E402
+
+BASE = "https://api.gdeltproject.org/api/v2/doc/doc"
+
+COLUMNS = [
+    "title",
+    "url",
+    "seen_date",
+    "domain",
+    "language",
+    "source_country",
+    "tone",
+    "social_image",
+]
+
+
+def fetch(
+    query: str,
+    mode: str,
+    timespan: str | None,
+    start_datetime: str | None,
+    end_datetime: str | None,
+    source_country: str | None,
+    source_lang: str | None,
+    limit: int,
+    out_path: str,
+) -> int:
+    params: dict[str, str] = {
+        "query": query,
+        "mode": mode,
+        "format": "json",
+        "maxrecords": str(min(limit, 250)),
+        "sort": "datedesc",
+    }
+    if timespan:
+        params["timespan"] = timespan
+    if start_datetime:
+        params["startdatetime"] = start_datetime.replace("-", "").replace(":", "").replace(" ", "")
+    if end_datetime:
+        params["enddatetime"] = end_datetime.replace("-", "").replace(":", "").replace(" ", "")
+    if source_country:
+        params["sourcecountry"] = source_country
+    if source_lang:
+        params["sourcelang"] = source_lang
+
+    url = f"{BASE}?{urllib.parse.urlencode(params)}"
+    payload: dict | list = {}
+    for attempt in range(3):
+        try:
+            payload = get_json(url)
+            break
+        except RuntimeError as e:
+            # GDELT requires 1 request per 5 seconds; back off and retry.
+            if "429" in str(e) and attempt < 2:
+                print(
+                    f"GDELT throttle hit; sleeping 6s before retry "
+                    f"(attempt {attempt + 1}/3)",
+                    file=sys.stderr,
+                )
+                time.sleep(6)
+                continue
+            print(f"GDELT error: {e}", file=sys.stderr)
+            payload = {}
+            break
+        except Exception as e:  # noqa: BLE001
+            print(f"GDELT error: {e}", file=sys.stderr)
+            payload = {}
+            break
+
+    rows: list[dict[str, str]] = []
+    if isinstance(payload, dict):
+        articles = payload.get("articles", []) or []
+        for a in articles[:limit]:
+            seen = (a.get("seendate") or "")
+            # GDELT format: 20260319T083000Z → 2026-03-19 08:30:00Z
+            if len(seen) == 16 and "T" in seen:
+                seen = f"{seen[0:4]}-{seen[4:6]}-{seen[6:8]} {seen[9:11]}:{seen[11:13]}:{seen[13:15]}Z"
+            rows.append(
+                {
+                    "title": (a.get("title") or "").replace("\n", " ").strip(),
+                    "url": a.get("url") or "",
+                    "seen_date": seen,
+                    "domain": a.get("domain") or "",
+                    "language": a.get("language") or "",
+                    "source_country": a.get("sourcecountry") or "",
+                    "tone": str(a.get("tone") or ""),
+                    "social_image": a.get("socialimage") or "",
+                }
+            )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    if not rows:
+        print(
+            f"GDELT: 0 articles for query={query!r}. "
+            "GDELT indexes ~2015→present. Try widening the timespan or "
+            "checking the query syntax (https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/).",
+            file=sys.stderr,
+        )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--query", required=True, help='Search query (supports GDELT operators: quoted phrases, AND/OR/NOT, sourcecountry:, theme:)')
+    p.add_argument(
+        "--mode",
+        default="ArtList",
+        choices=["ArtList", "ImageCollage", "TimelineVol", "TimelineTone", "ToneChart"],
+        help="GDELT mode (default ArtList for article list)",
+    )
+    p.add_argument(
+        "--timespan",
+        help="Relative window: e.g. '1d', '1w', '1m', '3m', '1y' (overrides start/end)",
+    )
+    p.add_argument("--start", help="Absolute start YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS")
+    p.add_argument("--end", help="Absolute end YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS")
+    p.add_argument("--source-country", help="2-letter source country (e.g. US, UK)")
+    p.add_argument("--source-lang", help="Source language (e.g. English, Spanish)")
+    p.add_argument("--limit", type=int, default=100)
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(
+        query=a.query,
+        mode=a.mode,
+        timespan=a.timespan,
+        start_datetime=a.start,
+        end_datetime=a.end,
+        source_country=a.source_country,
+        source_lang=a.source_lang,
+        limit=a.limit,
+        out_path=a.out,
+    )
+    print(f"Wrote {n} GDELT article rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_icij_offshore.py b/optional-skills/research/osint-investigation/scripts/fetch_icij_offshore.py
new file mode 100644
index 00000000000..8d050b62bf1
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_icij_offshore.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""Search ICIJ Offshore Leaks via the bulk CSV database.
+
+The old reconcile endpoint (https://offshoreleaks.icij.org/reconcile) returns
+404 — ICIJ has removed it. The remaining stable access path is the public
+bulk download:
+
+    https://offshoreleaks-data.icij.org/offshoreleaks/csv/full-oldb.LATEST.zip
+
+~70 MB, ~6 CSVs inside (nodes-entities, nodes-officers, nodes-intermediaries,
+nodes-addresses, relationships, ...). We cache it under
+$HERMES_OSINT_CACHE/icij/ (default: ~/.cache/hermes-osint/icij/) and search
+locally so the agent doesn't re-download for every query.
+
+Output CSV columns match the original `fetch_icij_offshore.py` contract.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import io
+import os
+import re
+import sys
+import time
+import urllib.request
+import zipfile
+from pathlib import Path
+
+BULK_URL = "https://offshoreleaks-data.icij.org/offshoreleaks/csv/full-oldb.LATEST.zip"
+
+COLUMNS = [
+    "node_id",
+    "name",
+    "node_type",
+    "country_codes",
+    "countries",
+    "jurisdiction",
+    "incorporation_date",
+    "inactivation_date",
+    "source",
+    "entity_url",
+    "connections",
+]
+
+
+def _cache_dir() -> Path:
+    base = os.environ.get("HERMES_OSINT_CACHE")
+    if base:
+        return Path(base) / "icij"
+    return Path.home() / ".cache" / "hermes-osint" / "icij"
+
+
+def _download(dest: Path, force: bool = False) -> Path:
+    """Download (or reuse cached) ICIJ bulk ZIP."""
+    dest.mkdir(parents=True, exist_ok=True)
+    zip_path = dest / "full-oldb.zip"
+    if zip_path.exists() and not force:
+        # Re-check age: refetch if older than 30 days.
+        age_days = (time.time() - zip_path.stat().st_mtime) / 86400
+        if age_days < 30:
+            return zip_path
+    print(f"Downloading ICIJ bulk database (~70 MB) to {zip_path}", file=sys.stderr)
+    req = urllib.request.Request(
+        BULK_URL,
+        headers={"User-Agent": "hermes-agent osint-investigation skill"},
+    )
+    with urllib.request.urlopen(req, timeout=120) as resp:  # noqa: S310
+        tmp = zip_path.with_suffix(".zip.tmp")
+        with open(tmp, "wb") as fh:
+            while True:
+                chunk = resp.read(1 << 16)
+                if not chunk:
+                    break
+                fh.write(chunk)
+    tmp.replace(zip_path)
+    return zip_path
+
+
+def _open_csv(zf: zipfile.ZipFile, name_pattern: str):
+    """Open the first CSV matching name_pattern (case-insensitive substring)."""
+    for info in zf.infolist():
+        if name_pattern.lower() in info.filename.lower() and info.filename.lower().endswith(".csv"):
+            return zf.open(info), info.filename
+    return None, None
+
+
+def _match(needle_norm: str, hay: str) -> bool:
+    return needle_norm in (hay or "").upper()
+
+
+def _normalize_query(s: str) -> str:
+    s = s.upper()
+    s = re.sub(r"[^\w\s]", " ", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+
+
+def fetch(
+    entity: str | None,
+    officer: str | None,
+    jurisdiction: str | None,
+    out_path: str,
+    cache_dir: Path,
+    force_refresh: bool = False,
+    limit: int = 500,
+) -> int:
+    zip_path = _download(cache_dir, force=force_refresh)
+    rows: list[dict[str, str]] = []
+    needles: list[tuple[str, str]] = []  # (kind, normalized needle)
+    if entity:
+        needles.append(("Entity", _normalize_query(entity)))
+    if officer:
+        needles.append(("Officer", _normalize_query(officer)))
+    jur_norm = _normalize_query(jurisdiction) if jurisdiction else None
+
+    targets = [
+        ("Entity", "nodes-entities"),
+        ("Officer", "nodes-officers"),
+        ("Intermediary", "nodes-intermediaries"),
+    ]
+
+    with zipfile.ZipFile(zip_path) as zf:
+        for node_type, csv_substring in targets:
+            relevant_needles = [n for (k, n) in needles if k in (node_type, "Entity", "Officer")] or []
+            # Only scan a CSV if we have a needle that could plausibly match it,
+            # or if we have ONLY a jurisdiction filter.
+            applicable_needles = [n for (k, n) in needles if k == node_type]
+            if needles and not applicable_needles and not jur_norm:
+                continue
+            stream, fname = _open_csv(zf, csv_substring)
+            if not stream:
+                continue
+            with stream:
+                text = io.TextIOWrapper(stream, encoding="utf-8", errors="replace")
+                reader = csv.DictReader(text)
+                for row in reader:
+                    name = (row.get("name") or "").strip()
+                    if not name:
+                        continue
+                    name_u = name.upper()
+                    matched = False
+                    for n in applicable_needles or relevant_needles:
+                        if _match(n, name_u):
+                            matched = True
+                            break
+                    if not needles:
+                        matched = True  # jurisdiction-only sweep
+                    if not matched:
+                        continue
+                    jur = (row.get("jurisdiction_description") or row.get("country_codes") or "").strip()
+                    if jur_norm and jur_norm not in jur.upper() and jur_norm not in (row.get("countries") or "").upper():
+                        continue
+                    node_id = (row.get("node_id") or "").strip()
+                    rows.append(
+                        {
+                            "node_id": node_id,
+                            "name": name,
+                            "node_type": node_type,
+                            "country_codes": row.get("country_codes", "") or "",
+                            "countries": row.get("countries", "") or "",
+                            "jurisdiction": jur,
+                            "incorporation_date": row.get("incorporation_date", "") or "",
+                            "inactivation_date": row.get("inactivation_date", "") or "",
+                            "source": row.get("sourceID", "") or row.get("source", "") or "",
+                            "entity_url": (
+                                f"https://offshoreleaks.icij.org/nodes/{node_id}" if node_id else ""
+                            ),
+                            "connections": "",
+                        }
+                    )
+                    if len(rows) >= limit:
+                        break
+            if len(rows) >= limit:
+                break
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    if not rows:
+        bits = []
+        if entity:
+            bits.append(f"entity={entity!r}")
+        if officer:
+            bits.append(f"officer={officer!r}")
+        if jurisdiction:
+            bits.append(f"jurisdiction={jurisdiction!r}")
+        print(
+            f"ICIJ: 0 matches for {', '.join(bits)}. "
+            "The bulk database covers offshore leaks (Panama, Paradise, Pandora, "
+            "Bahamas, Offshore Leaks). Most private US individuals are NOT in it.",
+            file=sys.stderr,
+        )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--entity", help="Search by entity name (substring, case-insensitive)")
+    p.add_argument("--officer", help="Search by officer / individual name (substring, case-insensitive)")
+    p.add_argument("--jurisdiction", help="Filter results by jurisdiction substring")
+    p.add_argument("--limit", type=int, default=500)
+    p.add_argument("--out", required=True)
+    p.add_argument(
+        "--cache-dir",
+        type=Path,
+        default=None,
+        help="Override cache directory (default: $HERMES_OSINT_CACHE/icij or ~/.cache/hermes-osint/icij)",
+    )
+    p.add_argument(
+        "--force-refresh",
+        action="store_true",
+        help="Re-download the bulk ZIP even if a recent cached copy exists.",
+    )
+    a = p.parse_args()
+    if not (a.entity or a.officer or a.jurisdiction):
+        p.error("must supply at least one of --entity / --officer / --jurisdiction")
+    n = fetch(
+        entity=a.entity,
+        officer=a.officer,
+        jurisdiction=a.jurisdiction,
+        out_path=a.out,
+        cache_dir=a.cache_dir or _cache_dir(),
+        force_refresh=a.force_refresh,
+        limit=a.limit,
+    )
+    print(f"Wrote {n} ICIJ Offshore Leaks rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_nyc_acris.py b/optional-skills/research/osint-investigation/scripts/fetch_nyc_acris.py
new file mode 100644
index 00000000000..6ec448f0f53
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_nyc_acris.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""Search NYC property records via ACRIS (Automated City Register Information System).
+
+Uses the city's Socrata-backed open data API. No auth required for read access.
+
+Datasets:
+  bnx9-e6tj — Real Property Master (one row per recorded document)
+  636b-3b5g — Real Property Parties (names — grantor, grantee, etc.)
+  8h5j-fqxa — Real Property Legal (lot / property identifiers)
+  uqqa-hym2 — Real Property References
+
+The Parties dataset has the names. We search by name and optionally join to
+Master to get the doc type and date.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import sys
+import urllib.parse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get_json  # noqa: E402
+
+PARTIES_URL = "https://data.cityofnewyork.us/resource/636b-3b5g.json"
+MASTER_URL = "https://data.cityofnewyork.us/resource/bnx9-e6tj.json"
+
+PARTY_TYPE = {
+    "1": "grantor (seller / mortgagor / debtor)",
+    "2": "grantee (buyer / mortgagee / creditor)",
+    "3": "other party",
+}
+
+BOROUGH = {
+    "1": "Manhattan",
+    "2": "Bronx",
+    "3": "Brooklyn",
+    "4": "Queens",
+    "5": "Staten Island",
+}
+
+COLUMNS = [
+    "document_id",
+    "name",
+    "party_type",
+    "party_role",
+    "address_1",
+    "address_2",
+    "city",
+    "state",
+    "zip",
+    "country",
+    "doc_type",
+    "doc_date",
+    "recorded_date",
+    "borough",
+    "amount",
+    "filing_url",
+]
+
+
+def _filing_url(document_id: str) -> str:
+    if not document_id:
+        return ""
+    return (
+        f"https://a836-acris.nyc.gov/DS/DocumentSearch/DocumentImageView?doc_id={document_id}"
+    )
+
+
+def fetch(
+    name: str | None,
+    address: str | None,
+    party_type: str | None,
+    limit: int,
+    out_path: str,
+    enrich: bool = True,
+) -> int:
+    if not (name or address):
+        raise SystemExit("must supply --name or --address")
+
+    where_clauses: list[str] = []
+    if name:
+        safe = name.upper().replace("'", "''")
+        where_clauses.append(f"upper(name) like '%{safe}%'")
+    if address:
+        safe_addr = address.upper().replace("'", "''")
+        where_clauses.append(f"upper(address_1) like '%{safe_addr}%'")
+    if party_type and party_type in {"1", "2", "3"}:
+        where_clauses.append(f"party_type='{party_type}'")
+
+    params = {
+        "$where": " AND ".join(where_clauses),
+        "$limit": str(limit),
+    }
+    url = f"{PARTIES_URL}?{urllib.parse.urlencode(params)}"
+    parties = get_json(url)
+    if not isinstance(parties, list):
+        raise SystemExit(f"Unexpected ACRIS response: {parties!r}")
+
+    # Enrich with master record (doc_type, dates, borough, amount).
+    doc_ids: list[str] = sorted({
+        d for d in (p.get("document_id") for p in parties) if d
+    })
+    masters: dict[str, dict] = {}
+    if enrich and doc_ids:
+        # Batch up to 100 doc_ids per request (Socrata IN-list is fine for this).
+        for i in range(0, len(doc_ids), 100):
+            chunk = doc_ids[i : i + 100]
+            id_list = ",".join(f"'{d}'" for d in chunk)
+            master_params = {
+                "$where": f"document_id in ({id_list})",
+                "$limit": "100",
+            }
+            url = f"{MASTER_URL}?{urllib.parse.urlencode(master_params)}"
+            try:
+                rows = get_json(url)
+            except Exception as e:  # noqa: BLE001
+                print(f"ACRIS master lookup failed for chunk: {e}", file=sys.stderr)
+                continue
+            if isinstance(rows, list):
+                for r in rows:
+                    did = r.get("document_id", "")
+                    if did:
+                        masters[did] = r
+
+    out_rows: list[dict[str, str]] = []
+    for p in parties:
+        did = p.get("document_id", "") or ""
+        m = masters.get(did, {})
+        out_rows.append(
+            {
+                "document_id": did,
+                "name": p.get("name", "") or "",
+                "party_type": p.get("party_type", "") or "",
+                "party_role": PARTY_TYPE.get(p.get("party_type", ""), ""),
+                "address_1": p.get("address_1", "") or "",
+                "address_2": p.get("address_2", "") or "",
+                "city": p.get("city", "") or "",
+                "state": p.get("state", "") or "",
+                "zip": p.get("zip", "") or "",
+                "country": p.get("country", "") or "",
+                "doc_type": m.get("doc_type", "") or "",
+                "doc_date": (m.get("document_date", "") or "")[:10],
+                "recorded_date": (m.get("recorded_datetime", "") or "")[:10],
+                "borough": BOROUGH.get(m.get("recorded_borough", ""), m.get("recorded_borough", "")),
+                "amount": m.get("document_amt", "") or "",
+                "filing_url": _filing_url(did),
+            }
+        )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(out_rows)
+
+    if not out_rows:
+        filters = []
+        if name:
+            filters.append(f"name={name!r}")
+        if address:
+            filters.append(f"address={address!r}")
+        print(
+            f"NYC ACRIS: 0 records for {', '.join(filters)}. "
+            "ACRIS covers ONLY NYC (5 boroughs). For property records elsewhere, "
+            "search the relevant county recorder directly.",
+            file=sys.stderr,
+        )
+    return len(out_rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--name", help="Party name substring (case-insensitive)")
+    p.add_argument("--address", help="Address line 1 substring")
+    p.add_argument(
+        "--party-type",
+        choices=["1", "2", "3"],
+        help="Filter party type: 1=grantor (seller/mortgagor), 2=grantee (buyer/mortgagee), 3=other",
+    )
+    p.add_argument("--limit", type=int, default=200)
+    p.add_argument(
+        "--no-enrich",
+        action="store_true",
+        help="Skip the master-document lookup that adds doc_type/date/amount",
+    )
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(
+        name=a.name,
+        address=a.address,
+        party_type=a.party_type,
+        limit=a.limit,
+        out_path=a.out,
+        enrich=not a.no_enrich,
+    )
+    print(f"Wrote {n} NYC ACRIS rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_ofac_sdn.py b/optional-skills/research/osint-investigation/scripts/fetch_ofac_sdn.py
new file mode 100644
index 00000000000..5233fa09ab8
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_ofac_sdn.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""Fetch OFAC SDN list (CSV format) and normalize.
+
+Public endpoint: https://www.treasury.gov/ofac/downloads/sdn.csv
+Format reference: https://ofac.treasury.gov/specially-designated-nationals-and-blocked-persons-list-sdn-human-readable-lists
+
+The SDN CSV uses a specific 12-column format with no header row:
+    ent_num, sdn_name, sdn_type, program, title, call_sign, vess_type,
+    tonnage, grt, vess_flag, vess_owner, remarks
+Address and AKA records live in separate files. We fetch all three and join.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import io
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get  # noqa: E402
+
+SDN_URL = "https://www.treasury.gov/ofac/downloads/sdn.csv"
+ADD_URL = "https://www.treasury.gov/ofac/downloads/add.csv"
+ALT_URL = "https://www.treasury.gov/ofac/downloads/alt.csv"
+
+SDN_COLS = [
+    "ent_num", "sdn_name", "sdn_type", "program", "title",
+    "call_sign", "vess_type", "tonnage", "grt", "vess_flag",
+    "vess_owner", "remarks",
+]
+ADD_COLS = [
+    "ent_num", "add_num", "address", "city_state_zip", "country", "add_remarks",
+]
+ALT_COLS = [
+    "ent_num", "alt_num", "alt_type", "alt_name", "alt_remarks",
+]
+
+COLUMNS = [
+    "entity_id",
+    "name",
+    "entity_type",
+    "program_list",
+    "title",
+    "nationalities",
+    "aka_list",
+    "addresses",
+    "dob",
+    "pob",
+    "remarks",
+    "last_updated",
+]
+
+_TYPE_MAP = {
+    "individual": "individual",
+    "entity": "entity",
+    "vessel": "vessel",
+    "aircraft": "aircraft",
+}
+
+
+def _read_csv(url: str, columns: list[str]) -> list[dict[str, str]]:
+    body = get(url, timeout=60).decode("latin-1", errors="replace")
+    reader = csv.reader(io.StringIO(body))
+    out = []
+    for row in reader:
+        if not row:
+            continue
+        # Pad/truncate to expected width.
+        row = row[: len(columns)] + [""] * (len(columns) - len(row))
+        out.append(dict(zip(columns, row)))
+    return out
+
+
+def _strip_quotes(s: str) -> str:
+    s = s.strip()
+    if s.startswith('"') and s.endswith('"'):
+        s = s[1:-1]
+    if s == "-0-":
+        return ""
+    return s
+
+
+def fetch(
+    program: str | None,
+    entity_type: str | None,
+    out_path: str,
+) -> int:
+    sdn = _read_csv(SDN_URL, SDN_COLS)
+    addresses = _read_csv(ADD_URL, ADD_COLS)
+    akas = _read_csv(ALT_URL, ALT_COLS)
+
+    addr_by_ent: dict[str, list[str]] = defaultdict(list)
+    for a in addresses:
+        ent = _strip_quotes(a["ent_num"])
+        parts = [
+            _strip_quotes(a[c])
+            for c in ("address", "city_state_zip", "country")
+            if _strip_quotes(a[c])
+        ]
+        if parts:
+            addr_by_ent[ent].append(", ".join(parts))
+
+    aka_by_ent: dict[str, list[str]] = defaultdict(list)
+    for k in akas:
+        ent = _strip_quotes(k["ent_num"])
+        name = _strip_quotes(k["alt_name"])
+        if name:
+            aka_by_ent[ent].append(name)
+
+    rows: list[dict[str, str]] = []
+    for r in sdn:
+        ent_num = _strip_quotes(r["ent_num"])
+        if not ent_num:
+            continue
+        sdn_type = _TYPE_MAP.get(_strip_quotes(r["sdn_type"]).lower(), _strip_quotes(r["sdn_type"]))
+        if entity_type and sdn_type != entity_type:
+            continue
+        progs = _strip_quotes(r["program"])
+        if program and program.upper() not in progs.upper().split(";"):
+            continue
+        remarks = _strip_quotes(r["remarks"])
+        # DOB / POB are commonly embedded in remarks for individuals.
+        dob = ""
+        pob = ""
+        if sdn_type == "individual" and remarks:
+            for chunk in remarks.split(";"):
+                ch = chunk.strip()
+                if ch.upper().startswith("DOB"):
+                    dob = ch.split(maxsplit=1)[1] if " " in ch else ""
+                elif ch.upper().startswith("POB"):
+                    pob = ch.split(maxsplit=1)[1] if " " in ch else ""
+        rows.append(
+            {
+                "entity_id": ent_num,
+                "name": _strip_quotes(r["sdn_name"]),
+                "entity_type": sdn_type,
+                "program_list": "; ".join(p.strip() for p in progs.split(";") if p.strip()),
+                "title": _strip_quotes(r["title"]),
+                "nationalities": "",  # not in this CSV; available in XML format
+                "aka_list": "; ".join(aka_by_ent.get(ent_num, [])),
+                "addresses": "; ".join(addr_by_ent.get(ent_num, [])),
+                "dob": dob,
+                "pob": pob,
+                "remarks": remarks,
+                "last_updated": "",
+            }
+        )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument("--program", help="Filter to specific sanctions program (e.g. SDGT, IRAN)")
+    p.add_argument(
+        "--entity-type",
+        choices=["individual", "entity", "vessel", "aircraft"],
+        help="Filter to a specific entity type",
+    )
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(program=a.program, entity_type=a.entity_type, out_path=a.out)
+    print(f"Wrote {n} OFAC SDN rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py b/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py
new file mode 100644
index 00000000000..6924a8056a6
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""Search OpenCorporates company registry data.
+
+OpenCorporates aggregates ~200M companies from 130+ jurisdictions. The
+public API requires an API token (free tier: 500 calls/month). Set
+OPENCORPORATES_API_TOKEN in env or pass --token.
+
+Without a token, this script falls back to scraping the public HTML
+search page (limited fields, more brittle, no jurisdiction filter).
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import os
+import re
+import sys
+import urllib.parse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get, get_json  # noqa: E402
+
+API_URL = "https://api.opencorporates.com/v0.4/companies/search"
+HTML_URL = "https://opencorporates.com/companies"
+
+COLUMNS = [
+    "name",
+    "company_number",
+    "jurisdiction_code",
+    "jurisdiction_name",
+    "incorporation_date",
+    "dissolution_date",
+    "company_type",
+    "status",
+    "registered_address",
+    "opencorporates_url",
+    "officers_count",
+    "source",
+]
+
+
+def _via_api(query: str, jurisdiction: str | None, token: str, limit: int) -> list[dict]:
+    params = {
+        "q": query,
+        "api_token": token,
+        "per_page": str(min(limit, 100)),
+    }
+    if jurisdiction:
+        params["jurisdiction_code"] = jurisdiction
+    url = f"{API_URL}?{urllib.parse.urlencode(params)}"
+    payload = get_json(url)
+    if not isinstance(payload, dict):
+        return []
+    results = payload.get("results", {}).get("companies", []) or []
+    return [r.get("company", {}) for r in results if isinstance(r, dict)]
+
+
+def _via_html(query: str, limit: int) -> list[dict]:
+    """Best-effort HTML fallback when no API token is available."""
+    params = {"q": query, "utf8": "✓"}
+    url = f"{HTML_URL}?{urllib.parse.urlencode(params)}"
+    body = get(url, user_agent="Mozilla/5.0 hermes-osint").decode("utf-8", errors="replace")
+    # Each result is in <li class="company"> ... </li> with name, url, status
+    pattern = re.compile(
+        r'<li[^>]*class="[^"]*company[^"]*"[^>]*>.*?'
+        r'<a[^>]+href="(?P<url>/companies/[^"]+)"[^>]*>(?P<name>[^<]+)</a>'
+        r'(?:.*?<span[^>]*class="[^"]*jurisdiction[^"]*"[^>]*>(?P<jur>[^<]+)</span>)?'
+        r"(?:.*?<dt[^>]*>(?:Company\s+Number|Number)</dt>\s*<dd[^>]*>(?P<num>[^<]+)</dd>)?",
+        re.DOTALL | re.IGNORECASE,
+    )
+    out = []
+    for m in pattern.finditer(body):
+        if len(out) >= limit:
+            break
+        url_path = m.group("url").strip()
+        out.append(
+            {
+                "name": (m.group("name") or "").strip(),
+                "opencorporates_url": f"https://opencorporates.com{url_path}",
+                "jurisdiction_code": (m.group("jur") or "").strip(),
+                "company_number": (m.group("num") or "").strip(),
+                "_via": "html",
+            }
+        )
+    return out
+
+
+def fetch(
+    query: str,
+    jurisdiction: str | None,
+    token: str | None,
+    limit: int,
+    out_path: str,
+) -> int:
+    if token:
+        try:
+            companies = _via_api(query, jurisdiction, token, limit)
+            source_tag = "api"
+        except Exception as e:  # noqa: BLE001
+            print(
+                f"OpenCorporates API call failed ({e}); falling back to HTML.",
+                file=sys.stderr,
+            )
+            companies = _via_html(query, limit)
+            source_tag = "html-fallback"
+    else:
+        print(
+            "OPENCORPORATES_API_TOKEN not set — using HTML fallback (limited fields). "
+            "Get a free token at https://opencorporates.com/api_accounts/new",
+            file=sys.stderr,
+        )
+        companies = _via_html(query, limit)
+        source_tag = "html"
+
+    rows: list[dict[str, str]] = []
+    for c in companies[:limit]:
+        if c.get("_via") == "html":
+            rows.append(
+                {
+                    "name": c.get("name", ""),
+                    "company_number": c.get("company_number", ""),
+                    "jurisdiction_code": c.get("jurisdiction_code", ""),
+                    "jurisdiction_name": "",
+                    "incorporation_date": "",
+                    "dissolution_date": "",
+                    "company_type": "",
+                    "status": "",
+                    "registered_address": "",
+                    "opencorporates_url": c.get("opencorporates_url", ""),
+                    "officers_count": "",
+                    "source": source_tag,
+                }
+            )
+            continue
+        addr = c.get("registered_address_in_full") or ""
+        rows.append(
+            {
+                "name": c.get("name", "") or "",
+                "company_number": c.get("company_number", "") or "",
+                "jurisdiction_code": c.get("jurisdiction_code", "") or "",
+                "jurisdiction_name": "",
+                "incorporation_date": c.get("incorporation_date", "") or "",
+                "dissolution_date": c.get("dissolution_date", "") or "",
+                "company_type": c.get("company_type", "") or "",
+                "status": c.get("current_status", "") or c.get("inactive", "") or "",
+                "registered_address": addr,
+                "opencorporates_url": c.get("opencorporates_url", "") or "",
+                "officers_count": str(c.get("officers", {}).get("total_count", "") if c.get("officers") else ""),
+                "source": source_tag,
+            }
+        )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    if not rows:
+        print(
+            f"OpenCorporates: 0 matches for query={query!r}"
+            f"{f' jurisdiction={jurisdiction!r}' if jurisdiction else ''}.",
+            file=sys.stderr,
+        )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--query", required=True, help="Company name search")
+    p.add_argument(
+        "--jurisdiction",
+        help="Jurisdiction code, e.g. 'us_ny', 'us_de', 'gb', 'sg' (lowercased OpenCorporates style)",
+    )
+    p.add_argument("--limit", type=int, default=50)
+    p.add_argument("--token", default=os.environ.get("OPENCORPORATES_API_TOKEN"))
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(
+        query=a.query,
+        jurisdiction=a.jurisdiction,
+        token=a.token,
+        limit=a.limit,
+        out_path=a.out,
+    )
+    print(f"Wrote {n} OpenCorporates rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_sec_edgar.py b/optional-skills/research/osint-investigation/scripts/fetch_sec_edgar.py
new file mode 100644
index 00000000000..bd2fda8feb9
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_sec_edgar.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+"""Fetch SEC EDGAR filings index for a given CIK or company name.
+
+SEC requires a User-Agent header with contact info. Set SEC_USER_AGENT,
+e.g. SEC_USER_AGENT="Research example@example.com".
+
+Filings JSON is published at:
+    https://data.sec.gov/submissions/CIK<10-digit-padded>.json
+
+Company lookup uses:
+    https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=<name>&output=atom
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import os
+import re
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get, get_json  # noqa: E402
+
+SUBMISSIONS_URL = "https://data.sec.gov/submissions/CIK{cik}.json"
+COLUMNS = [
+    "cik",
+    "company_name",
+    "form_type",
+    "filing_date",
+    "accession_number",
+    "primary_document",
+    "filing_url",
+    "reporting_period",
+]
+
+
+def _ua() -> str:
+    ua = os.environ.get("SEC_USER_AGENT", "").strip()
+    if not ua:
+        raise SystemExit(
+            "SEC requires a User-Agent with contact info. "
+            "Set SEC_USER_AGENT='Your Name your@email'."
+        )
+    return ua
+
+
+def _resolve_cik(company: str) -> tuple[str, str]:
+    """Resolve a company name to a CIK via EDGAR's atom feed.
+
+    Returns (cik, resolved_company_name). The feed entries also reveal whether
+    the match is an individual filer (Form 3/4/5 only) — surfaced in the
+    return value so callers can warn.
+    """
+    url = "https://www.sec.gov/cgi-bin/browse-edgar"
+    params = {"action": "getcompany", "company": company, "output": "atom", "owner": "include"}
+    body = get(url, params=params, user_agent=_ua()).decode("utf-8", errors="replace")
+    m = re.search(r"CIK=(\d{10})", body)
+    if not m:
+        raise SystemExit(f"Could not resolve CIK for company={company!r}")
+    cik = m.group(1)
+    name_m = re.search(r"<title>([^<]+)\s*\((\d{10})\)</title>", body)
+    resolved = name_m.group(1).strip() if name_m else ""
+    return cik, resolved
+
+
+def fetch(
+    cik: str | None,
+    company: str | None,
+    types: list[str],
+    since: str | None,
+    out_path: str,
+) -> int:
+    resolved_name = ""
+    if not cik and company:
+        try:
+            cik, resolved_name = _resolve_cik(company)  # type: ignore[assignment]
+        except SystemExit as e:
+            # Write empty CSV with header so downstream tools still work,
+            # and tell the user clearly.
+            print(f"SEC EDGAR: {e}", file=sys.stderr)
+            Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+            with open(out_path, "w", newline="", encoding="utf-8") as fh:
+                csv.DictWriter(fh, fieldnames=COLUMNS).writeheader()
+            return 0
+        if resolved_name:
+            print(
+                f"Resolved company={company!r} → CIK {cik} ({resolved_name})",
+                file=sys.stderr,
+            )
+    if not cik:
+        raise SystemExit("must supply --cik or --company")
+    cik = cik.zfill(10)
+    url = SUBMISSIONS_URL.format(cik=cik)
+    payload = get_json(url, user_agent=_ua())
+    if not isinstance(payload, dict):
+        raise SystemExit(f"Unexpected EDGAR response shape for CIK {cik}")
+    name = payload.get("name", "")
+    recent = (payload.get("filings", {}) or {}).get("recent", {}) or {}
+    form = recent.get("form", [])
+    date = recent.get("filingDate", [])
+    accession = recent.get("accessionNumber", [])
+    primary_doc = recent.get("primaryDocument", [])
+    period = recent.get("reportDate", [])
+
+    # Histogram of available filing types — useful for surfacing why a filter
+    # returned 0 (e.g. user asked for 10-K on an individual Form 4 filer).
+    type_hist: dict[str, int] = {}
+    for ftype in form:
+        type_hist[ftype] = type_hist.get(ftype, 0) + 1
+
+    type_set = {t.strip().upper() for t in types} if types else None
+    rows: list[dict[str, str]] = []
+    for i, ftype in enumerate(form):
+        if type_set and ftype.upper() not in type_set:
+            continue
+        fdate = date[i] if i < len(date) else ""
+        if since and fdate and fdate < since:
+            continue
+        acc = accession[i] if i < len(accession) else ""
+        pdoc = primary_doc[i] if i < len(primary_doc) else ""
+        acc_nodash = acc.replace("-", "")
+        filing_url = (
+            f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{acc_nodash}/{pdoc}"
+            if acc and pdoc
+            else ""
+        )
+        rows.append(
+            {
+                "cik": cik,
+                "company_name": name,
+                "form_type": ftype,
+                "filing_date": fdate,
+                "accession_number": acc,
+                "primary_document": pdoc,
+                "filing_url": filing_url,
+                "reporting_period": period[i] if i < len(period) else "",
+            }
+        )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+
+    if not rows and type_hist:
+        top = sorted(type_hist.items(), key=lambda kv: -kv[1])[:8]
+        hist_str = ", ".join(f"{t}={n}" for t, n in top)
+        print(
+            f"Warning: SEC EDGAR CIK {cik} ({name}) has {sum(type_hist.values())} "
+            f"recent filings but NONE match types={types}. "
+            f"Available form types: {hist_str}.",
+            file=sys.stderr,
+        )
+        # Insider-filer heuristic: only Form 3/4/5 → individual person, not a company.
+        company_types = {"10-K", "10-Q", "8-K", "20-F", "DEF 14A", "S-1"}
+        if not (set(type_hist.keys()) & company_types):
+            print(
+                f"Note: CIK {cik} appears to be an INDIVIDUAL filer "
+                f"(insider Form 3/4/5 only), not a corporate registrant. "
+                f"The resolver may have matched an officer/director named "
+                f"{company!r} rather than a company.",
+                file=sys.stderr,
+            )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument("--cik", help="Central Index Key (will be 10-digit zero-padded)")
+    p.add_argument("--company", help="Resolve to CIK by company name")
+    p.add_argument("--types", default="", help="Comma-separated form types (e.g. 10-K,10-Q,8-K)")
+    p.add_argument("--since", help="Skip filings before YYYY-MM-DD")
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    types = [t for t in (a.types or "").split(",") if t.strip()]
+    n = fetch(cik=a.cik, company=a.company, types=types, since=a.since, out_path=a.out)
+    print(f"Wrote {n} EDGAR filing rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_senate_ld.py b/optional-skills/research/osint-investigation/scripts/fetch_senate_ld.py
new file mode 100644
index 00000000000..3119ff8a9a5
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_senate_ld.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""Fetch Senate Lobbying Disclosure (LD-1 / LD-2) filings.
+
+Anonymous: 120 req/hour. Token (SENATE_LDA_TOKEN): 1200 req/hour.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import os
+import sys
+import time
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get_json  # noqa: E402
+
+ENDPOINT = "https://lda.senate.gov/api/v1/filings/"
+COLUMNS = [
+    "filing_uuid",
+    "filing_type",
+    "filing_year",
+    "filing_period",
+    "registrant_name",
+    "registrant_id",
+    "client_name",
+    "client_id",
+    "client_general_description",
+    "income",
+    "expenses",
+    "lobbyists",
+    "issues",
+    "government_entities",
+    "filing_date",
+]
+
+
+def fetch(
+    client: str | None,
+    registrant: str | None,
+    year: int,
+    token: str | None,
+    out_path: str,
+    page_size: int = 100,
+    max_pages: int = 25,
+) -> int:
+    params: dict = {"filing_year": year, "page_size": page_size}
+    if client:
+        params["client_name"] = client
+    if registrant:
+        params["registrant_name"] = registrant
+
+    headers = {"Authorization": f"Token {token}"} if token else None
+    rows: list[dict[str, str]] = []
+    url = ENDPOINT
+    page = 0
+    while page < max_pages:
+        try:
+            payload = get_json(url, params=params if page == 0 else None, headers=headers)
+        except Exception as e:  # noqa: BLE001
+            print(f"Senate LDA error on page {page + 1}: {e}", file=sys.stderr)
+            break
+        if not isinstance(payload, dict):
+            break
+        results = payload.get("results", [])
+        for r in results:
+            client_obj = r.get("client") or {}
+            registrant_obj = r.get("registrant") or {}
+            lobbying_activities = r.get("lobbying_activities") or []
+            lobbyists = []
+            issues = []
+            entities = []
+            for la in lobbying_activities:
+                for lob in la.get("lobbyists") or []:
+                    lob_obj = lob.get("lobbyist") or {}
+                    name = " ".join(
+                        x for x in (lob_obj.get("first_name", ""), lob_obj.get("last_name", "")) if x
+                    )
+                    if name:
+                        lobbyists.append(name)
+                desc = la.get("description") or ""
+                if desc:
+                    issues.append(desc)
+                for ge in la.get("government_entities") or []:
+                    nm = ge.get("name") or ""
+                    if nm:
+                        entities.append(nm)
+            rows.append(
+                {
+                    "filing_uuid": r.get("filing_uuid", "") or "",
+                    "filing_type": r.get("filing_type", "") or "",
+                    "filing_year": str(r.get("filing_year", "") or year),
+                    "filing_period": r.get("filing_period", "") or "",
+                    "registrant_name": registrant_obj.get("name", "") or "",
+                    "registrant_id": str(registrant_obj.get("id", "") or ""),
+                    "client_name": client_obj.get("name", "") or "",
+                    "client_id": str(client_obj.get("id", "") or ""),
+                    "client_general_description": client_obj.get("general_description", "") or "",
+                    "income": str(r.get("income", "") or ""),
+                    "expenses": str(r.get("expenses", "") or ""),
+                    "lobbyists": "; ".join(sorted(set(lobbyists))),
+                    "issues": "; ".join(issues),
+                    "government_entities": "; ".join(sorted(set(entities))),
+                    "filing_date": (r.get("dt_posted") or "")[:10],
+                }
+            )
+        next_url = payload.get("next")
+        if not next_url:
+            break
+        url = next_url
+        page += 1
+        time.sleep(1.0 if not token else 0.3)
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument("--client", help="Client name filter")
+    p.add_argument("--registrant", help="Registrant (lobbying firm) name filter")
+    p.add_argument("--year", type=int, default=2024)
+    p.add_argument("--token", default=os.environ.get("SENATE_LDA_TOKEN"))
+    p.add_argument("--max-pages", type=int, default=25)
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    if not (a.client or a.registrant):
+        p.error("must supply at least one of --client / --registrant")
+    n = fetch(
+        client=a.client,
+        registrant=a.registrant,
+        year=a.year,
+        token=a.token,
+        out_path=a.out,
+        max_pages=a.max_pages,
+    )
+    print(f"Wrote {n} Senate LDA rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_usaspending.py b/optional-skills/research/osint-investigation/scripts/fetch_usaspending.py
new file mode 100644
index 00000000000..a59c5f17276
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_usaspending.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""Fetch federal contracts/awards from USAspending.gov API v2.
+
+No auth required. POST to /api/v2/search/spending_by_award/ with filters.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import sys
+import time
+import urllib.request
+from pathlib import Path
+
+ENDPOINT = "https://api.usaspending.gov/api/v2/search/spending_by_award/"
+COLUMNS = [
+    "award_id",
+    "recipient_name",
+    "recipient_uei",
+    "recipient_duns",
+    "recipient_parent_name",
+    "recipient_state",
+    "awarding_agency",
+    "awarding_sub_agency",
+    "award_type",
+    "award_amount",
+    "award_date",
+    "period_of_performance_start",
+    "period_of_performance_end",
+    "naics_code",
+    "psc_code",
+    "competition_extent",
+    "description",
+]
+
+# USAspending result column "code" → human label mapping for output.
+_FIELDS = [
+    "Award ID",
+    "Recipient Name",
+    "Recipient UEI",
+    "Recipient DUNS Number",
+    "Recipient Parent Name",
+    "Recipient State Code",
+    "Awarding Agency",
+    "Awarding Sub Agency",
+    "Award Type",
+    "Award Amount",
+    "Start Date",
+    "End Date",
+    "NAICS Code",
+    "PSC Code",
+    "Type of Set Aside",
+    "Description",
+]
+
+
+def _post(body: dict) -> dict:
+    req = urllib.request.Request(
+        ENDPOINT,
+        data=json.dumps(body).encode("utf-8"),
+        headers={"Content-Type": "application/json", "User-Agent": "hermes-agent osint-investigation"},
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=60) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def fetch(
+    recipient: str | None,
+    agency: str | None,
+    fy: int,
+    sole_source_only: bool,
+    out_path: str,
+    page_size: int = 100,
+    max_pages: int = 20,
+) -> int:
+    filters: dict = {
+        "time_period": [{"start_date": f"{fy - 1}-10-01", "end_date": f"{fy}-09-30"}],
+        # Contracts only by default; adjust award_type_codes for grants/loans.
+        "award_type_codes": ["A", "B", "C", "D"],
+    }
+    if recipient:
+        filters["recipient_search_text"] = [recipient]
+    if agency:
+        filters["agencies"] = [{"type": "awarding", "tier": "toptier", "name": agency}]
+
+    rows: list[dict[str, str]] = []
+    page = 1
+    while page <= max_pages:
+        body = {
+            "filters": filters,
+            "fields": _FIELDS,
+            "page": page,
+            "limit": page_size,
+            "sort": "Award Amount",
+            "order": "desc",
+        }
+        try:
+            payload = _post(body)
+        except Exception as e:  # noqa: BLE001
+            print(f"USAspending error on page {page}: {e}", file=sys.stderr)
+            break
+        results = payload.get("results", [])
+        if not results:
+            break
+        for r in results:
+            set_aside = r.get("Type of Set Aside", "") or ""
+            if sole_source_only and "sole" not in set_aside.lower():
+                continue
+            rows.append(
+                {
+                    "award_id": r.get("Award ID", "") or "",
+                    "recipient_name": r.get("Recipient Name", "") or "",
+                    "recipient_uei": r.get("Recipient UEI", "") or "",
+                    "recipient_duns": r.get("Recipient DUNS Number", "") or "",
+                    "recipient_parent_name": r.get("Recipient Parent Name", "") or "",
+                    "recipient_state": r.get("Recipient State Code", "") or "",
+                    "awarding_agency": r.get("Awarding Agency", "") or "",
+                    "awarding_sub_agency": r.get("Awarding Sub Agency", "") or "",
+                    "award_type": r.get("Award Type", "") or "",
+                    "award_amount": str(r.get("Award Amount", "") or ""),
+                    "award_date": r.get("Start Date", "") or "",
+                    "period_of_performance_start": r.get("Start Date", "") or "",
+                    "period_of_performance_end": r.get("End Date", "") or "",
+                    "naics_code": str(r.get("NAICS Code", "") or ""),
+                    "psc_code": str(r.get("PSC Code", "") or ""),
+                    "competition_extent": set_aside,
+                    "description": r.get("Description", "") or "",
+                }
+            )
+        meta = payload.get("page_metadata", {})
+        if not meta.get("hasNext"):
+            break
+        page += 1
+        time.sleep(0.5)
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument("--recipient", help="Recipient name search")
+    p.add_argument("--agency", help="Awarding agency (top-tier)")
+    p.add_argument("--fy", type=int, default=2024, help="Federal fiscal year")
+    p.add_argument("--sole-source-only", action="store_true")
+    p.add_argument("--max-pages", type=int, default=20)
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    if not (a.recipient or a.agency):
+        p.error("must supply at least one of --recipient / --agency")
+    n = fetch(
+        recipient=a.recipient,
+        agency=a.agency,
+        fy=a.fy,
+        sole_source_only=a.sole_source_only,
+        out_path=a.out,
+        max_pages=a.max_pages,
+    )
+    print(f"Wrote {n} USAspending rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_wayback.py b/optional-skills/research/osint-investigation/scripts/fetch_wayback.py
new file mode 100644
index 00000000000..fb9147f22c2
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_wayback.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""Search the Internet Archive Wayback Machine via the CDX server.
+
+The CDX API indexes ~900B+ archived web pages. Anonymous read access,
+no auth required. Useful for finding deleted / changed pages by URL,
+domain, or substring match.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import sys
+import urllib.parse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get_json  # noqa: E402
+
+BASE = "https://web.archive.org/cdx/search/cdx"
+
+COLUMNS = [
+    "url",
+    "timestamp",
+    "wayback_url",
+    "mimetype",
+    "status",
+    "digest",
+    "length",
+]
+
+
+def fetch(
+    url_or_host: str,
+    match_type: str,
+    from_date: str | None,
+    to_date: str | None,
+    status: str | None,
+    mime: str | None,
+    collapse: str | None,
+    limit: int,
+    out_path: str,
+) -> int:
+    params: dict[str, str] = {
+        "url": url_or_host,
+        "matchType": match_type,
+        "output": "json",
+        "limit": str(limit),
+    }
+    if from_date:
+        params["from"] = from_date.replace("-", "")
+    if to_date:
+        params["to"] = to_date.replace("-", "")
+    if status:
+        params["filter"] = f"statuscode:{status}"
+    if mime:
+        params.setdefault("filter", "")
+        # Multiple filters: CDX accepts repeated filter params via urlencode list
+        params["filter"] = f"mimetype:{mime}"
+    if collapse:
+        params["collapse"] = collapse
+
+    url = f"{BASE}?{urllib.parse.urlencode(params)}"
+    try:
+        payload = get_json(url)
+    except Exception as e:  # noqa: BLE001
+        print(f"Wayback CDX error: {e}", file=sys.stderr)
+        payload = []
+
+    rows: list[dict[str, str]] = []
+    if isinstance(payload, list) and len(payload) > 1:
+        header = payload[0]
+        idx = {h: i for i, h in enumerate(header)}
+        for entry in payload[1:]:
+            ts = entry[idx["timestamp"]] if "timestamp" in idx else ""
+            orig = entry[idx["original"]] if "original" in idx else ""
+            rows.append(
+                {
+                    "url": orig,
+                    "timestamp": ts,
+                    "wayback_url": f"https://web.archive.org/web/{ts}/{orig}" if ts and orig else "",
+                    "mimetype": entry[idx["mimetype"]] if "mimetype" in idx else "",
+                    "status": entry[idx["statuscode"]] if "statuscode" in idx else "",
+                    "digest": entry[idx["digest"]] if "digest" in idx else "",
+                    "length": entry[idx["length"]] if "length" in idx else "",
+                }
+            )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    if not rows:
+        print(
+            f"Wayback Machine: 0 captures for {url_or_host!r} matchType={match_type}.",
+            file=sys.stderr,
+        )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--url", required=True, help="URL or host to look up in the archive")
+    p.add_argument(
+        "--match",
+        default="exact",
+        choices=["exact", "prefix", "host", "domain"],
+        help=(
+            "exact: this URL only. "
+            "prefix: this URL's path-prefix. "
+            "host: any URL on this host. "
+            "domain: any URL on this domain or subdomains."
+        ),
+    )
+    p.add_argument("--from-date", help="Earliest capture YYYY-MM-DD")
+    p.add_argument("--to-date", help="Latest capture YYYY-MM-DD")
+    p.add_argument("--status", help="HTTP status filter (e.g. 200)")
+    p.add_argument("--mime", help="MIME type filter (e.g. text/html)")
+    p.add_argument(
+        "--collapse",
+        help="Collapse adjacent identical entries (e.g. 'digest' for unique-content captures)",
+    )
+    p.add_argument("--limit", type=int, default=200)
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(
+        url_or_host=a.url,
+        match_type=a.match,
+        from_date=a.from_date,
+        to_date=a.to_date,
+        status=a.status,
+        mime=a.mime,
+        collapse=a.collapse,
+        limit=a.limit,
+        out_path=a.out,
+    )
+    print(f"Wrote {n} Wayback capture rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py b/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py
new file mode 100644
index 00000000000..4ce5c93813c
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+"""Search Wikipedia + Wikidata for an entity (person, company, place, concept).
+
+Two free APIs:
+  - Wikipedia OpenSearch + REST summary endpoint for narrative bio
+  - Wikidata SPARQL endpoint for structured facts (birth, employer, awards, etc.)
+
+Both are anonymous-access. Useful for resolving who-is-this-entity questions
+and surfacing cross-references that other sources can join against.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import re
+import sys
+import urllib.parse
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from _http import get_json  # noqa: E402
+
+WP_OPENSEARCH = "https://en.wikipedia.org/w/api.php"
+WP_SUMMARY = "https://en.wikipedia.org/api/rest_v1/page/summary/"
+WD_ACTION = "https://www.wikidata.org/w/api.php"
+
+COLUMNS = [
+    "source",
+    "label",
+    "description",
+    "qid",
+    "wikipedia_title",
+    "wikipedia_url",
+    "wikidata_url",
+    "instance_of",
+    "country",
+    "occupation",
+    "employer",
+    "date_of_birth",
+    "place_of_birth",
+    "summary",
+]
+
+
+def _wp_search(query: str, limit: int) -> list[dict]:
+    params = {
+        "action": "opensearch",
+        "search": query,
+        "limit": str(min(limit, 20)),
+        "format": "json",
+    }
+    url = f"{WP_OPENSEARCH}?{urllib.parse.urlencode(params)}"
+    data = get_json(url)
+    if not isinstance(data, list) or len(data) < 4:
+        return []
+    titles, descs, urls = data[1], data[2], data[3]
+    out = []
+    for i, title in enumerate(titles):
+        out.append(
+            {
+                "title": title,
+                "description": descs[i] if i < len(descs) else "",
+                "url": urls[i] if i < len(urls) else "",
+            }
+        )
+    return out
+
+
+def _wp_summary(title: str) -> dict:
+    """Pull the REST summary for a title — short bio, image, type."""
+    url = f"{WP_SUMMARY}{urllib.parse.quote(title.replace(' ', '_'))}"
+    try:
+        return get_json(url)  # type: ignore[return-value]
+    except Exception as e:  # noqa: BLE001
+        print(f"Wikipedia summary lookup for {title!r} failed: {e}", file=sys.stderr)
+        return {}
+
+
+def _wd_lookup_by_qid(qid: str) -> dict:
+    """Pull common facts for a QID via Wikidata's Action API (no SPARQL).
+
+    The Action API is far more lenient on rate-limits than the SPARQL Query
+    Service. We get claims as QIDs and then resolve labels in one batch call.
+    """
+    # Properties of interest. The Action API returns claims as QIDs or
+    # typed literals, so the slot mapping is local-only.
+    interesting = {
+        "P31": "instance_of",
+        "P17": "country",          # for orgs / places
+        "P27": "country",          # for individuals (country of citizenship)
+        "P106": "occupation",
+        "P108": "employer",
+        "P569": "date_of_birth",
+        "P19": "place_of_birth",
+    }
+    params = {
+        "action": "wbgetentities",
+        "ids": qid,
+        "props": "claims",
+        "format": "json",
+    }
+    url = f"{WD_ACTION}?{urllib.parse.urlencode(params)}"
+    try:
+        data = get_json(url)
+    except Exception as e:  # noqa: BLE001
+        print(f"Wikidata wbgetentities for {qid} failed: {e}", file=sys.stderr)
+        return {}
+    if not isinstance(data, dict):
+        return {}
+    claims = (data.get("entities", {}).get(qid, {}) or {}).get("claims", {}) or {}
+
+    # Collect raw values (QIDs or literals) and remember which slot each
+    # came from. Date literals come back as ISO strings; QIDs need a label
+    # resolution pass.
+    qid_to_slots: dict[str, list[str]] = {}
+    facts: dict[str, list[str]] = {}
+    for prop_id, slot in interesting.items():
+        for claim in claims.get(prop_id, []) or []:
+            v = (claim.get("mainsnak", {}) or {}).get("datavalue", {}) or {}
+            vtype = v.get("type")
+            value = v.get("value")
+            if vtype == "wikibase-entityid" and isinstance(value, dict):
+                vqid = value.get("id", "")
+                if vqid:
+                    qid_to_slots.setdefault(vqid, [])
+                    if slot not in qid_to_slots[vqid]:
+                        qid_to_slots[vqid].append(slot)
+            elif vtype == "time" and isinstance(value, dict):
+                raw = value.get("time", "") or ""
+                # +1955-10-28T00:00:00Z → 1955-10-28
+                m = re.search(r"[+-]?(\d{4})-(\d{2})-(\d{2})", raw)
+                if m:
+                    facts.setdefault(slot, []).append(
+                        f"{m.group(1)}-{m.group(2)}-{m.group(3)}"
+                    )
+            elif vtype == "string":
+                facts.setdefault(slot, []).append(str(value))
+
+    # Resolve labels for all referenced QIDs in one batch (up to 50 at a time).
+    qids = list(qid_to_slots)
+    for i in range(0, len(qids), 50):
+        batch = qids[i : i + 50]
+        params = {
+            "action": "wbgetentities",
+            "ids": "|".join(batch),
+            "props": "labels",
+            "languages": "en",
+            "format": "json",
+        }
+        url = f"{WD_ACTION}?{urllib.parse.urlencode(params)}"
+        try:
+            data = get_json(url)
+        except Exception as e:  # noqa: BLE001
+            print(f"Wikidata label batch failed: {e}", file=sys.stderr)
+            continue
+        if not isinstance(data, dict):
+            continue
+        ents = data.get("entities", {}) or {}
+        for vqid, ent in ents.items():
+            label = (ent.get("labels", {}).get("en", {}) or {}).get("value", "") or vqid
+            for slot in qid_to_slots.get(vqid, []):
+                facts.setdefault(slot, []).append(label)
+
+    # Deduplicate per slot, preserving order.
+    deduped: dict[str, list[str]] = {}
+    for slot, vals in facts.items():
+        seen = set()
+        out = []
+        for v in vals:
+            if v in seen:
+                continue
+            seen.add(v)
+            out.append(v)
+        deduped[slot] = out
+    return deduped
+
+
+def _wd_qid_for_title(title: str) -> str:
+    """Get the Wikidata QID associated with a Wikipedia article title."""
+    params = {
+        "action": "query",
+        "format": "json",
+        "prop": "pageprops",
+        "ppprop": "wikibase_item",
+        "titles": title,
+        "redirects": 1,
+    }
+    url = f"{WP_OPENSEARCH}?{urllib.parse.urlencode(params)}"
+    try:
+        data = get_json(url)
+    except Exception:  # noqa: BLE001
+        return ""
+    if not isinstance(data, dict):
+        return ""
+    pages = data.get("query", {}).get("pages", {}) or {}
+    for page in pages.values():
+        qid = (page.get("pageprops") or {}).get("wikibase_item", "")
+        if qid:
+            return qid
+    return ""
+
+
+def fetch(query: str, limit: int, no_wikidata: bool, out_path: str) -> int:
+    hits = _wp_search(query, limit)
+    rows: list[dict[str, str]] = []
+    for hit in hits[:limit]:
+        title = hit.get("title", "")
+        if not title:
+            continue
+        summary = _wp_summary(title)
+        qid = _wd_qid_for_title(title) if not no_wikidata else ""
+        facts: dict = {}
+        if qid:
+            facts = _wd_lookup_by_qid(qid)
+        rows.append(
+            {
+                "source": "wikipedia+wikidata" if qid else "wikipedia",
+                "label": title,
+                "description": (summary.get("description") or hit.get("description") or "").strip(),
+                "qid": qid,
+                "wikipedia_title": title,
+                "wikipedia_url": hit.get("url", ""),
+                "wikidata_url": f"https://www.wikidata.org/wiki/{qid}" if qid else "",
+                "instance_of": "; ".join(facts.get("instance_of", [])),
+                "country": "; ".join(facts.get("country", [])),
+                "occupation": "; ".join(facts.get("occupation", [])),
+                "employer": "; ".join(facts.get("employer", [])),
+                "date_of_birth": "; ".join(facts.get("date_of_birth", []))[:10] if facts.get("date_of_birth") else "",
+                "place_of_birth": "; ".join(facts.get("place_of_birth", [])),
+                "summary": (summary.get("extract") or "").replace("\n", " ")[:1000],
+            }
+        )
+
+    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", newline="", encoding="utf-8") as fh:
+        w = csv.DictWriter(fh, fieldnames=COLUMNS)
+        w.writeheader()
+        w.writerows(rows)
+    if not rows:
+        print(
+            f"Wikipedia: 0 articles for query={query!r}. "
+            "Private individuals not notable enough for a Wikipedia article "
+            "won't appear here (the bar is real).",
+            file=sys.stderr,
+        )
+    return len(rows)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--query", required=True, help="Entity name (person, company, place, concept)")
+    p.add_argument("--limit", type=int, default=5)
+    p.add_argument(
+        "--no-wikidata",
+        action="store_true",
+        help="Skip the Wikidata SPARQL enrichment (faster, less detail)",
+    )
+    p.add_argument("--out", required=True)
+    a = p.parse_args()
+    n = fetch(query=a.query, limit=a.limit, no_wikidata=a.no_wikidata, out_path=a.out)
+    print(f"Wrote {n} Wikipedia/Wikidata rows to {a.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/scripts/timing_analysis.py b/optional-skills/research/osint-investigation/scripts/timing_analysis.py
new file mode 100644
index 00000000000..4e0ece227b4
--- /dev/null
+++ b/optional-skills/research/osint-investigation/scripts/timing_analysis.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+"""Permutation test for donation/contract timing correlation (stdlib-only).
+
+For each (donor, vendor) pair, compute the mean number of days between each
+donation and the nearest contract award. Then shuffle contract award dates
+N times within the observation window and compute the same statistic. The
+one-tailed p-value is the fraction of permutations whose mean is <= the
+observed mean (smaller distance = tighter clustering).
+
+Adapted from ShinMegamiBoson/OpenPlanter (MIT). Differences:
+  - Pure stdlib (no pandas / numpy)
+  - Domain-agnostic (no snow-vendor / CRITICAL-politician filter)
+  - Configurable column names via flags
+  - Optional --seed for reproducibility
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import datetime as dt
+import json
+import math
+import random
+import statistics
+from collections import defaultdict
+from pathlib import Path
+
+_DATE_FORMATS = ("%Y-%m-%d", "%m/%d/%Y", "%Y/%m/%d", "%m-%d-%Y", "%Y%m%d")
+
+
+def parse_date(raw: str) -> dt.date | None:
+    if not raw:
+        return None
+    raw = raw.strip()
+    for fmt in _DATE_FORMATS:
+        try:
+            return dt.datetime.strptime(raw, fmt).date()
+        except ValueError:
+            continue
+    return None
+
+
+def _read(path: str) -> list[dict[str, str]]:
+    with open(path, newline="", encoding="utf-8") as fh:
+        return list(csv.DictReader(fh))
+
+
+def _nearest_distance(donation_date: dt.date, awards: list[dt.date]) -> int:
+    """Absolute days to nearest award date."""
+    return min(abs((donation_date - a).days) for a in awards)
+
+
+def _permute(
+    awards_count: int,
+    donations: list[dt.date],
+    date_min: dt.date,
+    date_max: dt.date,
+    rng: random.Random,
+) -> float:
+    """One permutation: draw uniform random award dates, compute mean nearest-distance."""
+    span_days = (date_max - date_min).days or 1
+    rand_awards = [
+        date_min + dt.timedelta(days=rng.randint(0, span_days))
+        for _ in range(awards_count)
+    ]
+    distances = [_nearest_distance(d, rand_awards) for d in donations]
+    return statistics.mean(distances)
+
+
+def analyze(
+    donations_path: str,
+    donation_date_col: str,
+    donation_amount_col: str,
+    donation_donor_col: str,
+    donation_recipient_col: str,
+    contracts_path: str,
+    contract_date_col: str,
+    contract_vendor_col: str,
+    cross_links_path: str | None,
+    n_permutations: int = 1000,
+    min_donations: int = 3,
+    p_threshold: float = 0.05,
+    seed: int | None = None,
+    out_path: str = "timing.json",
+) -> dict:
+    rng = random.Random(seed)
+
+    donations = _read(donations_path)
+    contracts = _read(contracts_path)
+
+    # Allow optional join through cross_links — donor (left) ↔ vendor (right).
+    # When present, donor strings get mapped to matched vendor names so the
+    # vendor-date index lookup actually finds the contracts.
+    matched_pairs: set[tuple[str, str]] | None = None
+    donor_to_vendors: dict[str, set[str]] = defaultdict(set)
+    if cross_links_path:
+        matched_pairs = set()
+        for row in _read(cross_links_path):
+            left = row.get("left_name", "")
+            right = row.get("right_name", "")
+            matched_pairs.add((left, right))
+            donor_to_vendors[left].add(right)
+
+    # Index contract dates by vendor name.
+    vendor_to_award_dates: dict[str, list[dt.date]] = defaultdict(list)
+    all_award_dates: list[dt.date] = []
+    for row in contracts:
+        d = parse_date(row.get(contract_date_col, ""))
+        if not d:
+            continue
+        vendor_to_award_dates[row.get(contract_vendor_col, "").strip()].append(d)
+        all_award_dates.append(d)
+
+    if not all_award_dates:
+        raise SystemExit(f"No parseable dates in {contracts_path}/{contract_date_col}")
+    global_min = min(all_award_dates)
+    global_max = max(all_award_dates)
+
+    # Group donations by (donor, recipient).
+    grouped: dict[tuple[str, str], list[tuple[dt.date, float]]] = defaultdict(list)
+    for row in donations:
+        donor = row.get(donation_donor_col, "").strip()
+        recip = row.get(donation_recipient_col, "").strip()
+        d = parse_date(row.get(donation_date_col, ""))
+        try:
+            amt = float(row.get(donation_amount_col, "0") or 0)
+        except ValueError:
+            amt = 0.0
+        if not (donor and recip and d):
+            continue
+        grouped[(donor, recip)].append((d, amt))
+
+    results = []
+    skipped = 0
+    for (donor, recip), records in grouped.items():
+        if len(records) < min_donations:
+            skipped += 1
+            continue
+        # Only test if donor appears in cross-links (when provided). The
+        # (donor, candidate) tuple itself is NOT what's in matched_pairs —
+        # cross_links pairs are (donor, vendor). We use the cross-link to
+        # map donor → vendor name(s) so the vendor-date index resolves.
+        if matched_pairs is not None and donor not in donor_to_vendors:
+            skipped += 1
+            continue
+        # Try direct donor→awards first, then go through cross-link vendor names.
+        award_dates = list(vendor_to_award_dates.get(donor, []))
+        if not award_dates:
+            award_dates = list(vendor_to_award_dates.get(recip, []))
+        if not award_dates and donor_to_vendors.get(donor):
+            for vendor_name in donor_to_vendors[donor]:
+                award_dates.extend(vendor_to_award_dates.get(vendor_name, []))
+        if not award_dates:
+            skipped += 1
+            continue
+
+        donation_dates = [d for (d, _) in records]
+        observed = statistics.mean(
+            _nearest_distance(d, award_dates) for d in donation_dates
+        )
+
+        permuted_means = [
+            _permute(len(award_dates), donation_dates, global_min, global_max, rng)
+            for _ in range(n_permutations)
+        ]
+        p_value = sum(1 for m in permuted_means if m <= observed) / n_permutations
+        null_mean = statistics.mean(permuted_means)
+        null_std = statistics.pstdev(permuted_means) or 1.0
+        effect_size = (null_mean - observed) / null_std
+
+        results.append(
+            {
+                "donor": donor,
+                "recipient": recip,
+                "n_donations": len(records),
+                "n_award_dates": len(award_dates),
+                "observed_mean_days": round(observed, 2),
+                "null_mean_days": round(null_mean, 2),
+                "p_value": round(p_value, 4),
+                "effect_size_sd": round(effect_size, 2),
+                "significant": p_value < p_threshold,
+                "total_donation_amount": round(sum(a for (_, a) in records), 2),
+            }
+        )
+
+    results.sort(key=lambda r: r["p_value"])
+
+    payload = {
+        "metadata": {
+            "n_permutations": n_permutations,
+            "min_donations": min_donations,
+            "p_threshold": p_threshold,
+            "seed": seed,
+            "n_pairs_tested": len(results),
+            "n_pairs_skipped": skipped,
+            "n_significant": sum(1 for r in results if r["significant"]),
+            "observation_window": [global_min.isoformat(), global_max.isoformat()],
+        },
+        "results": results,
+    }
+
+    Path(out_path).write_text(json.dumps(payload, indent=2))
+    return payload
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    p.add_argument("--donations", required=True)
+    p.add_argument("--donation-date-col", required=True)
+    p.add_argument("--donation-amount-col", required=True)
+    p.add_argument("--donation-donor-col", required=True)
+    p.add_argument("--donation-recipient-col", required=True)
+    p.add_argument("--contracts", required=True)
+    p.add_argument("--contract-date-col", required=True)
+    p.add_argument("--contract-vendor-col", required=True)
+    p.add_argument(
+        "--cross-links",
+        help="Optional cross_links.csv to restrict (donor, vendor) pairs",
+    )
+    p.add_argument("--permutations", type=int, default=1000)
+    p.add_argument("--min-donations", type=int, default=3)
+    p.add_argument("--p-threshold", type=float, default=0.05)
+    p.add_argument("--seed", type=int)
+    p.add_argument("--out", default="timing.json")
+    a = p.parse_args()
+
+    payload = analyze(
+        donations_path=a.donations,
+        donation_date_col=a.donation_date_col,
+        donation_amount_col=a.donation_amount_col,
+        donation_donor_col=a.donation_donor_col,
+        donation_recipient_col=a.donation_recipient_col,
+        contracts_path=a.contracts,
+        contract_date_col=a.contract_date_col,
+        contract_vendor_col=a.contract_vendor_col,
+        cross_links_path=a.cross_links,
+        n_permutations=a.permutations,
+        min_donations=a.min_donations,
+        p_threshold=a.p_threshold,
+        seed=a.seed,
+        out_path=a.out,
+    )
+    meta = payload["metadata"]
+    print(
+        f"Tested {meta['n_pairs_tested']} pairs ({meta['n_pairs_skipped']} skipped). "
+        f"Significant (p<{meta['p_threshold']}): {meta['n_significant']}. "
+        f"Wrote {a.out}"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/optional-skills/research/osint-investigation/templates/source-template.md b/optional-skills/research/osint-investigation/templates/source-template.md
new file mode 100644
index 00000000000..b023cc26888
--- /dev/null
+++ b/optional-skills/research/osint-investigation/templates/source-template.md
@@ -0,0 +1,59 @@
+# <Source Name>
+
+## 1. Summary
+
+What this data source is, who publishes it, why it matters for investigations.
+
+## 2. Access Methods
+
+- API endpoint(s)
+- Bulk download URLs
+- Auth requirements (none / API key / OAuth)
+- Rate limits
+
+## 3. Data Schema
+
+Key fields, record types, table relationships. List the columns the fetch
+script emits.
+
+## 4. Coverage
+
+- Jurisdiction
+- Time range
+- Update frequency
+- Data volume (rows / GB)
+
+## 5. Cross-Reference Potential
+
+Which other sources can be joined and on what keys. Be explicit:
+
+- `<source>` ↔ `<column>` (join key: <normalized entity name / EIN / CIK / etc.>)
+
+## 6. Data Quality
+
+Known issues — formatting inconsistencies, missing fields, duplicates,
+historical gaps, redaction.
+
+## 7. Acquisition Script
+
+Path: `scripts/fetch_<source>.py`
+
+Example:
+
+```bash
+python3 SKILL_DIR/scripts/fetch_<source>.py --<filter> <value> --out data/<source>.csv
+```
+
+Output CSV columns: `<col1>, <col2>, ...`
+
+## 8. Legal & Licensing
+
+- Public records law / FOIA basis
+- Terms of use / acceptable use
+- Attribution requirements (if any)
+
+## 9. References
+
+- Official docs: <url>
+- Data dictionary: <url>
+- Related coverage / journalism: <url>
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index d1544ce89b9..ce1861431a6 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -167,6 +167,7 @@ hermes skills uninstall <skill-name>
 | [**drug-discovery**](/docs/user-guide/skills/optional/research/research-drug-discovery) | Pharmaceutical research assistant for drug discovery workflows. Search bioactive compounds on ChEMBL, calculate drug-likeness (Lipinski Ro5, QED, TPSA, synthetic accessibility), look up drug-drug interactions via OpenFDA, interpret ADMET... |
 | [**duckduckgo-search**](/docs/user-guide/skills/optional/research/research-duckduckgo-search) | Free web search via DuckDuckGo — text, news, images, videos. No API key needed. Prefer the `ddgs` CLI when installed; use the Python DDGS library only after verifying that `ddgs` is available in the current runtime. |
 | [**gitnexus-explorer**](/docs/user-guide/skills/optional/research/research-gitnexus-explorer) | Index a codebase with GitNexus and serve an interactive knowledge graph via web UI + Cloudflare tunnel. |
+| [**osint-investigation**](/docs/user-guide/skills/optional/research/research-osint-investigation) | Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback... |
 | [**parallel-cli**](/docs/user-guide/skills/optional/research/research-parallel-cli) | Optional vendor skill for Parallel CLI — agent-native web search, extraction, deep research, enrichment, FindAll, and monitoring. Prefer JSON output and non-interactive flows. |
 | [**qmd**](/docs/user-guide/skills/optional/research/research-qmd) | Search personal knowledge bases, notes, docs, and meeting transcripts locally using qmd — a hybrid retrieval engine with BM25, vector search, and LLM reranking. Supports CLI and MCP integration. |
 | [**scrapling**](/docs/user-guide/skills/optional/research/research-scrapling) | Web scraping with Scrapling - HTTP fetching, stealth browser automation, Cloudflare bypass, and spider crawling via CLI and Python. |
diff --git a/website/docs/user-guide/skills/optional/research/research-osint-investigation.md b/website/docs/user-guide/skills/optional/research/research-osint-investigation.md
new file mode 100644
index 00000000000..7428c3022b2
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/research/research-osint-investigation.md
@@ -0,0 +1,294 @@
+---
+title: "Osint Investigation"
+sidebar_label: "Osint Investigation"
+description: "Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property r..."
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Osint Investigation
+
+Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback Machine archives, Wikipedia + Wikidata, GDELT news monitoring. Entity resolution across sources, cross-link analysis, timing correlation, evidence chains. Python stdlib only.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/research/osint-investigation` |
+| Path | `optional-skills/research/osint-investigation` |
+| Version | `0.1.0` |
+| Author | Hermes Agent (adapted from ShinMegamiBoson/OpenPlanter, MIT) |
+| Platforms | linux, macos, windows |
+| Tags | `osint`, `investigation`, `public-records`, `sec`, `sanctions`, `corporate-registry`, `property`, `courts`, `due-diligence`, `journalism` |
+| Related skills | [`domain-intel`](/docs/user-guide/skills/optional/research/research-domain-intel), [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# OSINT Investigation — Public Records Cross-Reference
+
+Investigative framework for public-records OSINT: government contracts,
+corporate filings, lobbying, sanctions, offshore leaks, property records,
+court records, web archives, knowledge bases, and global news. Resolve
+entities across heterogeneous sources, build cross-links with explicit
+confidence, run statistical timing tests, and produce structured evidence
+chains.
+
+**Python stdlib only.** Zero install. Works on Linux, macOS, Windows. Most
+sources work with no API key (OpenCorporates has an optional free token
+that raises rate limits).
+
+Adapted from the MIT-licensed ShinMegamiBoson/OpenPlanter project; expanded
+to cover identity / property / litigation / archives / news sources that
+the original didn't address.
+
+## When to use this skill
+
+Use when the user asks for:
+
+- "follow the money" — government contracts, lobbying → legislation, sanctions
+- corporate due diligence — who controls company X, where are they
+  incorporated, who serves on their boards, what filings have they made
+- sanctions screening — is entity X on OFAC SDN, ICIJ offshore leaks
+- pay-to-play investigation — contractors with offshore ties, lobbying
+  clients winning awards
+- property ownership — find recorded deeds/mortgages by name or address
+  (NYC; for other counties point users at the relevant recorder)
+- litigation history — find federal + state court opinions and PACER dockets
+- multi-source entity resolution where naming varies (LLC suffixes, abbreviations)
+- evidence-chain construction with explicit confidence levels
+- "what's been said about X" — international news (GDELT) + Wikipedia
+  narrative + Wayback Machine to recover dead URLs
+
+Do NOT use this skill for:
+
+- general web research → `web_search` / `web_extract`
+- domain/infrastructure OSINT → `domain-intel` skill
+- academic literature → `arxiv` skill
+- social-media profile discovery → `sherlock` skill (optional)
+- US **federal** campaign finance — FEC is intentionally NOT covered here
+  (the API is unreliable for ad-hoc contributor-name queries on the free
+  DEMO_KEY tier). For federal donations, point users at
+  https://www.fec.gov/data/ directly.
+
+## Workflow
+
+The agent runs scripts via the `terminal` tool. `SKILL_DIR` is the directory
+holding this SKILL.md.
+
+### 1. Identify which sources apply
+
+Read the data-source wiki entries to plan the investigation:
+
+```
+ls SKILL_DIR/references/sources/
+
+# Federal financial / regulatory
+cat SKILL_DIR/references/sources/sec-edgar.md       # corporate filings
+cat SKILL_DIR/references/sources/usaspending.md     # federal contracts
+cat SKILL_DIR/references/sources/senate-ld.md       # lobbying
+cat SKILL_DIR/references/sources/ofac-sdn.md        # sanctions
+cat SKILL_DIR/references/sources/icij-offshore.md   # offshore leaks
+
+# Identity / property / litigation / archives / news
+cat SKILL_DIR/references/sources/nyc-acris.md       # NYC property records
+cat SKILL_DIR/references/sources/opencorporates.md  # global corporate registry
+cat SKILL_DIR/references/sources/courtlistener.md   # court records (federal + state)
+cat SKILL_DIR/references/sources/wayback.md         # Wayback Machine archives
+cat SKILL_DIR/references/sources/wikipedia.md       # Wikipedia + Wikidata
+cat SKILL_DIR/references/sources/gdelt.md           # global news monitoring
+```
+
+Each entry follows a 9-section template: summary, access, schema, coverage,
+cross-reference keys, data quality, acquisition, legal, references.
+
+The **cross-reference potential** section maps join keys between sources — read
+those first to pick the right pair.
+
+### 2. Acquire data
+
+Each source has a stdlib-only fetch script in `SKILL_DIR/scripts/`:
+
+**Federal financial / regulatory**
+
+```bash
+# SEC EDGAR filings (corporate disclosures)
+python3 SKILL_DIR/scripts/fetch_sec_edgar.py --cik 0000320193 \
+    --types 10-K,10-Q --out data/edgar_filings.csv
+
+# USAspending federal contracts
+python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
+    --fy 2024 --out data/contracts.csv
+
+# Senate LD-1 / LD-2 lobbying disclosures
+python3 SKILL_DIR/scripts/fetch_senate_ld.py --client "EXAMPLE CORP" \
+    --year 2024 --out data/lobbying.csv
+
+# OFAC SDN sanctions list (full snapshot)
+python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --out data/ofac_sdn.csv
+
+# ICIJ Offshore Leaks — downloads ~70 MB bulk CSV on first use,
+# then searches it locally. Cached for 30 days under
+# $HERMES_OSINT_CACHE/icij/ (default: ~/.cache/hermes-osint/icij/).
+python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
+    --out data/icij.csv
+```
+
+**Identity / property / litigation / archives / news**
+
+```bash
+# NYC property records (deeds, mortgages, liens) — ACRIS via Socrata
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "SMITH, JOHN" \
+    --out data/acris.csv
+python3 SKILL_DIR/scripts/fetch_nyc_acris.py --address "571 HUDSON" \
+    --out data/acris_addr.csv
+
+# OpenCorporates — 130+ jurisdiction corporate registry
+# (free token required; set OPENCORPORATES_API_TOKEN or pass --token)
+python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
+    --jurisdiction us_ny --out data/opencorporates.csv
+
+# CourtListener — federal + state court opinions, PACER dockets
+python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Smith v. Example Corp" \
+    --type opinions --out data/courts.csv
+
+# Wayback Machine — historical web captures
+python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
+    --match host --collapse digest --out data/wayback.csv
+
+# Wikipedia + Wikidata — narrative bio + structured facts
+# Set HERMES_OSINT_UA=your-app/1.0 (your@email) to identify yourself
+python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Bill Gates" \
+    --out data/wp.csv
+
+# GDELT — global news in 100+ languages, ~2015→present
+python3 SKILL_DIR/scripts/fetch_gdelt.py --query '"Example Corp"' \
+    --timespan 1y --out data/gdelt.csv
+```
+
+All outputs are normalized CSV with a header row. Re-run scripts idempotently.
+
+When a private individual won't be in a source (e.g. SEC EDGAR for a non-public-
+company person, USAspending for someone who isn't a federal contractor, Senate
+LDA for someone who isn't a lobbying client), the script returns 0 rows with a
+clear warning rather than silently writing an empty CSV. EDGAR specifically
+flags when the company-name resolver matched an individual Form 3/4/5 filer
+rather than a corporate registrant.
+
+Rate-limit notes are in each source's wiki entry. Default fetchers sleep
+politely between paginated requests. **API keys raise rate limits** for
+sources that support them (`SEC_USER_AGENT`, `SENATE_LDA_TOKEN`,
+`OPENCORPORATES_API_TOKEN`, `COURTLISTENER_TOKEN`). All scripts surface
+429 responses immediately with the upstream's quota message so the user
+knows to slow down or supply a key.
+
+### 3. Resolve entities across sources
+
+Normalize names and find matches between two CSV files:
+
+```bash
+# Match lobbying clients (Senate LDA) against contract recipients (USAspending)
+python3 SKILL_DIR/scripts/entity_resolution.py \
+    --left  data/lobbying.csv   --left-name-col  client_name \
+    --right data/contracts.csv  --right-name-col recipient_name \
+    --out data/cross_links.csv
+```
+
+Three matching tiers with explicit confidence:
+
+| Tier | Method | Confidence |
+|------|--------|------------|
+| `exact` | Normalized strings equal after suffix/punctuation strip | high |
+| `fuzzy` | Sorted-token equality (word-bag match) | medium |
+| `token_overlap` | ≥60% token overlap, ≥2 shared tokens, tokens ≥4 chars | low |
+
+Output `cross_links.csv` columns: `match_type, confidence, left_name,
+right_name, left_normalized, right_normalized, left_row, right_row`.
+
+### 4. Statistical timing correlation (optional)
+
+Test whether two time series cluster suspiciously close together — e.g.
+lobbying filings near contract awards — using a permutation test:
+
+```bash
+python3 SKILL_DIR/scripts/timing_analysis.py \
+    --donations data/lobbying.csv --donation-date-col filing_date \
+        --donation-amount-col income --donation-donor-col client_name \
+        --donation-recipient-col registrant_name \
+    --contracts data/contracts.csv --contract-date-col award_date \
+        --contract-vendor-col recipient_name \
+    --cross-links data/cross_links.csv \
+    --permutations 1000 \
+    --out data/timing.json
+```
+
+The script's column flags are intentionally generic — the original tool was
+written for donations vs awards, but it works for any (event, payee) time
+series joined through cross-links. Null hypothesis: event timing is
+independent of award dates. One-tailed p-value = fraction of permutations
+with mean nearest-award distance ≤ observed. Minimum 3 events per (payer,
+vendor) pair to run the test.
+
+### 5. Build the findings JSON (evidence chain)
+
+```bash
+python3 SKILL_DIR/scripts/build_findings.py \
+    --cross-links data/cross_links.csv \
+    --timing data/timing.json \
+    --out data/findings.json
+```
+
+Every finding has `id, title, severity, confidence, summary, evidence[], sources[]`.
+Each evidence item points back to a specific row in a source CSV. The user (or a
+follow-up agent) can verify every claim against its source.
+
+## Confidence and evidence discipline
+
+This is the load-bearing rule of the skill. Tell the user:
+
+- Every claim must trace to a record. No naked assertions.
+- Confidence tier travels with the claim. `match_type=fuzzy` is "probable",
+  not "confirmed."
+- Entity resolution produces candidates, NOT conclusions. A `fuzzy` match
+  between "ACME LLC" and "Acme Holdings Group" is a lead, not a fact.
+- Statistical significance ≠ wrongdoing. p &lt; 0.05 means the timing pattern
+  is unlikely under the null. It does not establish corruption.
+- All data sources here are public records. They may still contain
+  inaccuracies, stale info, or redactions (GDPR, sealed records).
+
+## Adding a new data source
+
+Use the template:
+
+```bash
+cp SKILL_DIR/templates/source-template.md \
+    SKILL_DIR/references/sources/<your-source>.md
+```
+
+Fill in all 9 sections. Write a `fetch_<source>.py` script in `scripts/` that
+uses stdlib only and writes a normalized CSV. Update the source list in the
+"When to use" section above.
+
+## Tools and their limits
+
+- `entity_resolution.py` does NOT use external fuzzy libraries (no rapidfuzz,
+  no jellyfish). Token-bag matching is the upper bound here. If you need
+  Levenshtein, transliteration, or phonetic matching, pip-install separately.
+- `timing_analysis.py` uses Python's `random` for permutations. For
+  reproducibility, pass `--seed N`.
+- `fetch_*.py` scripts use `urllib.request` and respect `Retry-After`. Heavy
+  bulk usage may still violate ToS — read each source's legal section first.
+
+## Legal note
+
+All Phase-1 sources are public records. Bulk acquisition is permitted under
+their respective access terms (FOIA, public records law, ICIJ explicit
+publication, OFAC public data). However:
+
+- Some sources rate-limit aggressively. Respect their headers.
+- Some redact registrant info (GDPR on WHOIS, sealed filings).
+- Cross-referencing public records to identify private individuals can have
+  ethical implications. The skill produces evidence chains, not accusations.
diff --git a/website/sidebars.ts b/website/sidebars.ts
index f619f2318c9..1a0aa6fb0bb 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -554,6 +554,7 @@ const sidebars: SidebarsConfig = {
                     'user-guide/skills/optional/research/research-drug-discovery',
                     'user-guide/skills/optional/research/research-duckduckgo-search',
                     'user-guide/skills/optional/research/research-gitnexus-explorer',
+                    'user-guide/skills/optional/research/research-osint-investigation',
                     'user-guide/skills/optional/research/research-parallel-cli',
                     'user-guide/skills/optional/research/research-qmd',
                     'user-guide/skills/optional/research/research-scrapling',

From d0a183cadd877fe21a92fdc9114509729444594e Mon Sep 17 00:00:00 2001
From: worlldz <101180447+worlldz@users.noreply.github.com>
Date: Sat, 16 May 2026 14:46:34 +0530
Subject: [PATCH 122/218] fix(doctor): suppress stale direct-key issues when
 oauth is healthy

Fixes #26693

`hermes doctor` currently promotes invalid direct API keys into the final
summary even when the matching OAuth path is already healthy. That makes
the setup look more broken than it really is.

This change keeps the failed API Connectivity row visible but stops
treating it as a blocking summary issue when a healthy OAuth fallback
already exists for the same provider family.

Covered cases:
- Gemini OAuth + invalid direct Gemini key
- MiniMax OAuth + invalid direct MiniMax key

Based on #26704 by @worlldz.
---
 hermes_cli/doctor.py            |  29 ++++++++-
 tests/hermes_cli/test_doctor.py | 105 ++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index bf5a8865909..9d3b6e3c01a 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -152,6 +152,30 @@ def _apply_doctor_tool_availability_overrides(available: list[str], unavailable:
     return updated_available, updated_unavailable
 
 
+def _has_healthy_oauth_fallback_for_apikey_provider(provider_label: str) -> bool:
+    """Return True when a direct API-key probe failure is non-blocking.
+
+    Some provider families support both a direct API-key path and a separate
+    OAuth runtime path. When the OAuth path is already healthy, doctor should
+    still show a failed API-key connectivity row, but it should not promote
+    that direct-key problem into the final blocking summary.
+    """
+    try:
+        from hermes_cli.auth import (
+            get_gemini_oauth_auth_status,
+            get_minimax_oauth_auth_status,
+        )
+    except Exception:
+        return False
+
+    normalized = (provider_label or "").strip().lower()
+    if normalized in {"google / gemini", "gemini"}:
+        return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
+    if normalized == "minimax":
+        return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
+    return False
+
+
 def check_ok(text: str, detail: str = ""):
     print(f"  {color('✓', Colors.GREEN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))
 
@@ -1594,7 +1618,10 @@ def run_doctor(args):
                 print(f"  {_glyph} {_label} {_detail}")
             else:
                 print(f"  {_glyph} {_label}")
-        for _issue in _r.issues:
+        _issues_to_add = list(_r.issues)
+        if _issues_to_add and _has_healthy_oauth_fallback_for_apikey_provider(_r.label):
+            _issues_to_add = []
+        for _issue in _issues_to_add:
             issues.append(_issue)
 
     # =========================================================================
diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py
index 34e75045eff..ee419656a71 100644
--- a/tests/hermes_cli/test_doctor.py
+++ b/tests/hermes_cli/test_doctor.py
@@ -839,3 +839,108 @@ class TestGitHubTokenCheck:
 
         assert "gh auth" in str(call_log) or any(c[0] == "gh" for c in call_log), f"gh not called: {call_log}"
         assert "GitHub authenticated via gh CLI" in out or "token configured" in out
+
+
+def _run_doctor_with_healthy_oauth_fallback(
+    monkeypatch,
+    tmp_path,
+    *,
+    env_key: str,
+    bad_key: str,
+    failing_host: str,
+    gemini_oauth_status: dict,
+    minimax_oauth_status: dict,
+) -> str:
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / "config.yaml").write_text(
+        "model:\n"
+        "  provider: nous\n"
+        "  default: moonshotai/kimi-k2.6\n",
+        encoding="utf-8",
+    )
+    project = tmp_path / "project"
+    project.mkdir(exist_ok=True)
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+    monkeypatch.setenv(env_key, bad_key)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("GEMINI_API_KEY", raising=False)
+    monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
+    monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
+    monkeypatch.delenv("MINIMAX_CN_API_KEY", raising=False)
+    monkeypatch.setenv(env_key, bad_key)
+
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    from hermes_cli import auth as _auth_mod
+
+    monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+    monkeypatch.setattr(_auth_mod, "get_gemini_oauth_auth_status", lambda: gemini_oauth_status)
+    monkeypatch.setattr(_auth_mod, "get_minimax_oauth_auth_status", lambda: minimax_oauth_status)
+
+    def fake_get(url, headers=None, timeout=None):
+        status = 401 if failing_host in url else 200
+        return types.SimpleNamespace(status_code=status)
+
+    import httpx
+
+    monkeypatch.setattr(httpx, "get", fake_get)
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=False))
+    return buf.getvalue()
+
+
+@pytest.mark.parametrize(
+    ("env_key", "bad_key", "failing_host", "gemini_oauth_status", "minimax_oauth_status", "unexpected_issue"),
+    [
+        (
+            "GOOGLE_API_KEY",
+            "bad-gemini-key",
+            "googleapis.com",
+            {"logged_in": True, "email": "user@example.com"},
+            {},
+            "Check GOOGLE_API_KEY in .env",
+        ),
+        (
+            "MINIMAX_API_KEY",
+            "bad-minimax-key",
+            "minimax.io",
+            {},
+            {"logged_in": True, "region": "global"},
+            "Check MINIMAX_API_KEY in .env",
+        ),
+    ],
+)
+def test_run_doctor_ignores_invalid_direct_keys_when_oauth_fallback_is_healthy(
+    monkeypatch,
+    tmp_path,
+    env_key,
+    bad_key,
+    failing_host,
+    gemini_oauth_status,
+    minimax_oauth_status,
+    unexpected_issue,
+):
+    out = _run_doctor_with_healthy_oauth_fallback(
+        monkeypatch,
+        tmp_path,
+        env_key=env_key,
+        bad_key=bad_key,
+        failing_host=failing_host,
+        gemini_oauth_status=gemini_oauth_status,
+        minimax_oauth_status=minimax_oauth_status,
+    )
+
+    assert "invalid API key" in out
+    assert unexpected_issue not in out

From 7bb97b952f7edd51ce29ba9f3db4e255d6792c22 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 16 May 2026 14:46:54 +0530
Subject: [PATCH 123/218] chore: add worlldz to AUTHOR_MAP for #26704 salvage

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 2e6bd6e6435..455467044a3 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -92,6 +92,7 @@ AUTHOR_MAP = {
     "30397170+1000Delta@users.noreply.github.com": "1000Delta",
     "szymonclawd@mac.home": "szymonclawd",
     "257759490+szymonclawd@users.noreply.github.com": "szymonclawd",
+    "101180447+worlldz@users.noreply.github.com": "worlldz",
     "zhanganzhe@tenclass.com": "luoyuctl",
     "51604064+luoyuctl@users.noreply.github.com": "luoyuctl",
     "127238744+teknium1@users.noreply.github.com": "teknium1",

From 4ded3ede334a7d5f8baa20f730bc8c5d3cdc399e Mon Sep 17 00:00:00 2001
From: konsisumer <der@konsi.org>
Date: Thu, 16 Apr 2026 05:10:28 +0200
Subject: [PATCH 124/218] fix: detect gh-copilot deprecation and improve GitHub
 Models 413 errors (#10648)

Address two blocking issues when using GitHub Copilot integrations:

1. ACP mode: detect the gh-copilot CLI deprecation error from stderr
   and surface an actionable message with alternatives instead of
   hanging or showing a cryptic error.

2. GitHub Models (Azure) 413: recognize models.inference.ai.azure.com
   as a known GitHub Models URL, and print a targeted hint explaining
   the hard 8K token limit that makes this endpoint incompatible with
   Hermes' system prompt size.
---
 agent/copilot_acp_client.py | 20 ++++++++++++++++++++
 agent/model_metadata.py     |  1 +
 hermes_cli/models.py        |  1 +
 run_agent.py                | 27 +++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)

diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 3643837bf5b..28a78bab13d 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -30,6 +30,14 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
 _TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
 _TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
 
+# Patterns that indicate the gh-copilot CLI extension has been deprecated.
+_DEPRECATION_PATTERNS = (
+    "has been deprecated",
+    "no commands will be executed",
+    "deprecation",
+    "copilot-cli",
+)
+
 
 def _resolve_command() -> str:
     return (
@@ -506,6 +514,18 @@ class CopilotACPClient:
 
             stderr_text = "\n".join(stderr_tail).strip()
             if proc.poll() is not None and stderr_text:
+                stderr_lower = stderr_text.lower()
+                if any(pat in stderr_lower for pat in _DEPRECATION_PATTERNS):
+                    raise RuntimeError(
+                        "The gh-copilot CLI extension has been deprecated by GitHub and "
+                        "can no longer be used for ACP mode.\n\n"
+                        "Alternatives:\n"
+                        "  1. Use the GitHub Copilot provider instead of ACP mode:\n"
+                        "     hermes setup  →  select 'GitHub Copilot' (uses Copilot Chat API)\n"
+                        "  2. Set HERMES_COPILOT_ACP_COMMAND to point to a compatible ACP server\n"
+                        "  3. Use a different provider (e.g. OpenAI, Anthropic, Nous)\n\n"
+                        f"Original error:\n{stderr_text}"
+                    )
                 raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
             raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
 
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 41e229416c9..8146cd97aa4 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -358,6 +358,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "api.deepseek.com": "deepseek",
     "api.githubcopilot.com": "copilot",
     "models.github.ai": "copilot",
+    "models.inference.ai.azure.com": "github-models",
     "api.fireworks.ai": "fireworks",
     "opencode.ai": "opencode-go",
     "api.x.ai": "xai",
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index ded3f448f87..336e220814e 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -2525,6 +2525,7 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
     return (
         normalized.startswith(COPILOT_BASE_URL)
         or normalized.startswith("https://models.github.ai/inference")
+        or normalized.startswith("https://models.inference.ai.azure.com")
     )
 
 
diff --git a/run_agent.py b/run_agent.py
index 310777076cb..4bae16685cb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -14184,6 +14184,33 @@ class AIAgent:
                             "interrupted": True,
                         }
                     
+                    # Actionable hint for GitHub Models (Azure) 413 errors.
+                    # The free tier enforces a hard 8K token limit per request,
+                    # which Hermes' system prompt alone can exceed.  Compression
+                    # won't help — surface a clear message so the user doesn't
+                    # wait through three futile compression attempts.
+                    if (
+                        status_code == 413
+                        and isinstance(_base, str)
+                        and "models.inference.ai.azure.com" in _base
+                    ):
+                        self._vprint(
+                            f"{self.log_prefix}   💡 GitHub Models (Azure) enforces a hard per-request token limit (often 8K).",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      Hermes' system prompt alone may exceed this limit.  This endpoint is not",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      compatible with Hermes Agent.  Use https://models.github.ai or the GitHub",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      Copilot provider instead, which have higher token limits.",
+                            force=True,
+                        )
+
                     # Check for 413 payload-too-large BEFORE generic 4xx handler.
                     # A 413 is a payload-size error — the correct response is to
                     # compress history and retry, not abort immediately.

From b85b938b1fe74ecf16dc22e4448ecbab49660727 Mon Sep 17 00:00:00 2001
From: konsisumer <der@konsi.org>
Date: Thu, 16 Apr 2026 06:08:54 +0200
Subject: [PATCH 125/218] test: add tests for copilot ACP deprecation detection
 and Azure URL mapping

Cover the deprecation pattern matching against real gh-copilot stderr
output, verify the GitHub Models Azure URL is in _URL_TO_PROVIDER, and
confirm _is_github_models_base_url recognises the Azure endpoint.
---
 tests/agent/test_copilot_acp_deprecation.py | 59 +++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tests/agent/test_copilot_acp_deprecation.py

diff --git a/tests/agent/test_copilot_acp_deprecation.py b/tests/agent/test_copilot_acp_deprecation.py
new file mode 100644
index 00000000000..033ede0c183
--- /dev/null
+++ b/tests/agent/test_copilot_acp_deprecation.py
@@ -0,0 +1,59 @@
+"""Tests for gh-copilot CLI deprecation detection and GitHub Models Azure URL mapping."""
+
+import pytest
+
+from agent.copilot_acp_client import _DEPRECATION_PATTERNS
+
+
+class TestDeprecationPatternDetection:
+    """Verify that stderr messages from a deprecated gh-copilot CLI are caught."""
+
+    _REAL_DEPRECATION_STDERR = (
+        "The gh-copilot extension has been deprecated in favor of the newer "
+        "GitHub Copilot CLI.\nFor more information, visit:\n"
+        "- Copilot CLI: https://github.com/github/copilot-cli\n"
+        "- Deprecation announcement: https://github.blog/changelog/"
+        "2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension\n"
+        "No commands will be executed."
+    )
+
+    def test_real_deprecation_message_matches(self):
+        lower = self._REAL_DEPRECATION_STDERR.lower()
+        assert any(pat in lower for pat in _DEPRECATION_PATTERNS)
+
+    @pytest.mark.parametrize(
+        "stderr_line",
+        [
+            "The gh-copilot extension has been deprecated",
+            "No commands will be executed.",
+            "See deprecation notice at ...",
+            "Install copilot-cli instead",
+        ],
+    )
+    def test_individual_patterns_match(self, stderr_line: str):
+        lower = stderr_line.lower()
+        assert any(pat in lower for pat in _DEPRECATION_PATTERNS)
+
+    def test_normal_stderr_does_not_match(self):
+        normal = "Error: connection refused"
+        assert not any(pat in normal.lower() for pat in _DEPRECATION_PATTERNS)
+
+
+class TestGitHubModelsAzureUrl:
+    """Verify that the Azure GitHub Models URL is recognised."""
+
+    def test_url_to_provider_contains_azure_models(self):
+        from agent.model_metadata import _URL_TO_PROVIDER
+
+        assert _URL_TO_PROVIDER.get("models.inference.ai.azure.com") == "github-models"
+
+    def test_is_github_models_base_url_recognises_azure(self):
+        from hermes_cli.models import _is_github_models_base_url
+
+        assert _is_github_models_base_url("https://models.inference.ai.azure.com")
+        assert _is_github_models_base_url("https://models.inference.ai.azure.com/v1/chat")
+
+    def test_is_github_models_base_url_still_recognises_github_ai(self):
+        from hermes_cli.models import _is_github_models_base_url
+
+        assert _is_github_models_base_url("https://models.github.ai/inference")

From 374dc81c2359a6f61e8d1efc49de29d61d7b9a88 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 01:58:13 -0700
Subject: [PATCH 126/218] fix(copilot-acp): tighten deprecation detection +
 sharpen GitHub Models 413 hint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up improvements on top of @konsisumer's cherry-picked fix for #10648:

1. Deprecation patterns required BOTH a product fingerprint ('gh-copilot') and
   a deprecation marker. The previous list included 'copilot-cli' and bare
   'deprecation', which would false-positive on stderr from the NEW
   @github/copilot CLI — whose repo is literally github.com/github/copilot-cli
   and which legitimately surfaces those substrings in its own messages.

2. Replace the deprecation hint. The user in #10648 installed
   'gh extension install github/gh-copilot' (the deprecated extension)
   thinking that's what ACP mode uses, when ACP actually spawns the new
   'copilot' binary from '@github/copilot'. The hint now points users at the
   correct install command ('npm install -g @github/copilot') with the new
   CLI's repo URL, and demotes provider-switching to a fallback alternative.

3. Change _URL_TO_PROVIDER value for models.inference.ai.azure.com from the
   'github-models' alias to the canonical 'copilot' provider id, matching the
   convention used by every other entry in the table.

4. Sharpen the 413 hint message. The free tier's ~8K cap is below the
   system-prompt floor, so this endpoint is fundamentally incompatible with
   an agentic loop — not a 'use a different URL' problem.

Tests:
- New parametrized false-positive coverage for the new CLI's stderr shape.
- Updated assertion to require canonical 'copilot' provider mapping.
- All 14 deprecation/URL tests pass.
---
 agent/copilot_acp_client.py                 | 43 ++++++++++++------
 agent/model_metadata.py                     |  7 ++-
 run_agent.py                                | 22 +++++----
 tests/agent/test_copilot_acp_deprecation.py | 50 ++++++++++++++-------
 4 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 28a78bab13d..f1bff1a7190 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -30,15 +30,29 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
 _TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
 _TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
 
-# Patterns that indicate the gh-copilot CLI extension has been deprecated.
-_DEPRECATION_PATTERNS = (
+# Stderr fingerprint of the deprecated `gh copilot` CLI extension
+# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
+# We require BOTH the literal product name ("gh-copilot") AND a deprecation
+# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
+# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
+# in its own banners and error messages — doesn't get misclassified as the
+# deprecated extension.
+_DEPRECATION_REQUIRED = ("gh-copilot",)
+_DEPRECATION_MARKERS = (
     "has been deprecated",
     "no commands will be executed",
-    "deprecation",
-    "copilot-cli",
 )
 
 
+def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
+    """True iff stderr looks like the deprecated gh-copilot extension's banner."""
+
+    lower = stderr_text.lower()
+    if not any(req in lower for req in _DEPRECATION_REQUIRED):
+        return False
+    return any(marker in lower for marker in _DEPRECATION_MARKERS)
+
+
 def _resolve_command() -> str:
     return (
         os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
@@ -514,16 +528,19 @@ class CopilotACPClient:
 
             stderr_text = "\n".join(stderr_tail).strip()
             if proc.poll() is not None and stderr_text:
-                stderr_lower = stderr_text.lower()
-                if any(pat in stderr_lower for pat in _DEPRECATION_PATTERNS):
+                if _is_gh_copilot_deprecation_message(stderr_text):
                     raise RuntimeError(
-                        "The gh-copilot CLI extension has been deprecated by GitHub and "
-                        "can no longer be used for ACP mode.\n\n"
-                        "Alternatives:\n"
-                        "  1. Use the GitHub Copilot provider instead of ACP mode:\n"
-                        "     hermes setup  →  select 'GitHub Copilot' (uses Copilot Chat API)\n"
-                        "  2. Set HERMES_COPILOT_ACP_COMMAND to point to a compatible ACP server\n"
-                        "  3. Use a different provider (e.g. OpenAI, Anthropic, Nous)\n\n"
+                        "Hermes ACP mode requires the NEW GitHub Copilot CLI "
+                        "(github.com/github/copilot-cli), but the binary it just "
+                        "spawned is the deprecated `gh copilot` extension.\n\n"
+                        "Install the new CLI:\n"
+                        "  npm install -g @github/copilot\n"
+                        "  # then verify with: copilot --help\n\n"
+                        "If `copilot` already resolves to the new CLI but you still see this,\n"
+                        "point Hermes at it explicitly:\n"
+                        "  export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
+                        "Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
+                        "directly with a Copilot subscription token) via `hermes setup`.\n\n"
                         f"Original error:\n{stderr_text}"
                     )
                 raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 8146cd97aa4..26a844ccb92 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -358,7 +358,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "api.deepseek.com": "deepseek",
     "api.githubcopilot.com": "copilot",
     "models.github.ai": "copilot",
-    "models.inference.ai.azure.com": "github-models",
+    # GitHub Models free tier (Azure-hosted prototyping endpoint) — same
+    # canonical provider as the Copilot API.  Hard per-request token cap
+    # (often 8K) makes it unusable for Hermes' system prompt, but mapping
+    # it here lets us recognize the endpoint and emit a targeted hint
+    # instead of falling through the unknown-custom-endpoint path.
+    "models.inference.ai.azure.com": "copilot",
     "api.fireworks.ai": "fireworks",
     "opencode.ai": "opencode-go",
     "api.x.ai": "xai",
diff --git a/run_agent.py b/run_agent.py
index 4bae16685cb..1dd4219b22e 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -14185,29 +14185,35 @@ class AIAgent:
                         }
                     
                     # Actionable hint for GitHub Models (Azure) 413 errors.
-                    # The free tier enforces a hard 8K token limit per request,
-                    # which Hermes' system prompt alone can exceed.  Compression
-                    # won't help — surface a clear message so the user doesn't
-                    # wait through three futile compression attempts.
+                    # The free tier enforces a hard 8K token cap per request,
+                    # which Hermes' system prompt + tool schemas alone exceed.
+                    # Compression can't help — the floor is the system prompt
+                    # itself, not the conversation — so surface a clear "not
+                    # compatible" message instead of looping into three futile
+                    # compression attempts.
                     if (
                         status_code == 413
                         and isinstance(_base, str)
                         and "models.inference.ai.azure.com" in _base
                     ):
                         self._vprint(
-                            f"{self.log_prefix}   💡 GitHub Models (Azure) enforces a hard per-request token limit (often 8K).",
+                            f"{self.log_prefix}   💡 GitHub Models free tier (models.inference.ai.azure.com) caps every",
                             force=True,
                         )
                         self._vprint(
-                            f"{self.log_prefix}      Hermes' system prompt alone may exceed this limit.  This endpoint is not",
+                            f"{self.log_prefix}      request at ~8K tokens. Hermes' system prompt + tool schemas baseline",
                             force=True,
                         )
                         self._vprint(
-                            f"{self.log_prefix}      compatible with Hermes Agent.  Use https://models.github.ai or the GitHub",
+                            f"{self.log_prefix}      exceeds that floor, so this endpoint cannot run an agentic loop.",
                             force=True,
                         )
                         self._vprint(
-                            f"{self.log_prefix}      Copilot provider instead, which have higher token limits.",
+                            f"{self.log_prefix}      Use the `copilot` provider with a Copilot subscription token (`hermes",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      setup` → GitHub Copilot), or pick any other provider.",
                             force=True,
                         )
 
diff --git a/tests/agent/test_copilot_acp_deprecation.py b/tests/agent/test_copilot_acp_deprecation.py
index 033ede0c183..a0da7736732 100644
--- a/tests/agent/test_copilot_acp_deprecation.py
+++ b/tests/agent/test_copilot_acp_deprecation.py
@@ -2,11 +2,12 @@
 
 import pytest
 
-from agent.copilot_acp_client import _DEPRECATION_PATTERNS
+from agent.copilot_acp_client import _is_gh_copilot_deprecation_message
 
 
 class TestDeprecationPatternDetection:
-    """Verify that stderr messages from a deprecated gh-copilot CLI are caught."""
+    """Verify that stderr from the deprecated `gh copilot` extension is caught
+    without false-positiving on the new `@github/copilot` CLI."""
 
     _REAL_DEPRECATION_STDERR = (
         "The gh-copilot extension has been deprecated in favor of the newer "
@@ -18,25 +19,40 @@ class TestDeprecationPatternDetection:
     )
 
     def test_real_deprecation_message_matches(self):
-        lower = self._REAL_DEPRECATION_STDERR.lower()
-        assert any(pat in lower for pat in _DEPRECATION_PATTERNS)
+        assert _is_gh_copilot_deprecation_message(self._REAL_DEPRECATION_STDERR)
 
     @pytest.mark.parametrize(
-        "stderr_line",
+        "stderr_text",
         [
-            "The gh-copilot extension has been deprecated",
-            "No commands will be executed.",
-            "See deprecation notice at ...",
-            "Install copilot-cli instead",
+            # The deprecation banner uses both halves of the fingerprint.
+            "The gh-copilot extension has been deprecated.",
+            "gh-copilot: no commands will be executed.",
+            # Mixed casing — match is case-insensitive.
+            "The GH-Copilot Extension HAS BEEN DEPRECATED.",
         ],
     )
-    def test_individual_patterns_match(self, stderr_line: str):
-        lower = stderr_line.lower()
-        assert any(pat in lower for pat in _DEPRECATION_PATTERNS)
+    def test_genuine_deprecation_variants_match(self, stderr_text: str):
+        assert _is_gh_copilot_deprecation_message(stderr_text)
 
-    def test_normal_stderr_does_not_match(self):
-        normal = "Error: connection refused"
-        assert not any(pat in normal.lower() for pat in _DEPRECATION_PATTERNS)
+    @pytest.mark.parametrize(
+        "stderr_text",
+        [
+            # Generic errors — no fingerprint at all.
+            "Error: connection refused",
+            "",
+            # The NEW @github/copilot CLI's repo is github.com/github/copilot-cli.
+            # Its stderr can legitimately mention "copilot-cli" or "deprecation"
+            # in unrelated contexts; neither alone should trip the detector.
+            "copilot-cli: failed to authenticate with the API",
+            "warning: the --foo flag is scheduled for deprecation in v3",
+            "See https://github.com/github/copilot-cli/issues for support",
+            # Half the fingerprint without the other half.
+            "gh-copilot: command not found",
+            "extension has been deprecated (some other extension)",
+        ],
+    )
+    def test_does_not_false_positive(self, stderr_text: str):
+        assert not _is_gh_copilot_deprecation_message(stderr_text)
 
 
 class TestGitHubModelsAzureUrl:
@@ -45,7 +61,9 @@ class TestGitHubModelsAzureUrl:
     def test_url_to_provider_contains_azure_models(self):
         from agent.model_metadata import _URL_TO_PROVIDER
 
-        assert _URL_TO_PROVIDER.get("models.inference.ai.azure.com") == "github-models"
+        # Maps to the canonical "copilot" provider (same convention as the
+        # other GitHub-family entries) — not the "github-models" alias.
+        assert _URL_TO_PROVIDER.get("models.inference.ai.azure.com") == "copilot"
 
     def test_is_github_models_base_url_recognises_azure(self):
         from hermes_cli.models import _is_github_models_base_url

From 585d6b64305ab94773a129880450d2ee3d362bbc Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 02:25:41 -0700
Subject: [PATCH 127/218] fix(gateway): merge rapid TEXT follow-ups during
 active sessions (#4469) (#26822)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the agent is running and the user sends multiple TEXT messages in
rapid succession, base.py's active-session branch stored the pending
event as a single-slot replacement:

    self._pending_messages[session_key] = event

Three rapid messages A, B, C landed as: A (interrupts), B (replaces A
before consumer reads), C (replaces B). Only C reached the next turn —
A and B were silently dropped. This is the symptom in #4469.

Route the follow-up through merge_pending_message_event(..., merge_text=True)
so TEXT events accumulate into the existing pending event's text instead
of clobbering it. Photo and media bursts already merged through the same
helper; this just extends the merge_text path (already used by the
Telegram bursty-grace branch in gateway/run.py) to all platforms.

Test exercises BasePlatformAdapter.handle_message directly with the
session marked active and asserts three rapid TEXT events merge to
'part two\\npart three' rather than dropping the middle message.
Sanity-checked the test would fail without the fix.

Credits @devorun for the original investigation and analysis in #4491
that surfaced the underlying queue handling, though their fix targeted
GatewayRunner._pending_messages which is now dead state on main.
---
 gateway/platforms/base.py                     |  20 ++-
 .../gateway/test_active_session_text_merge.py | 152 ++++++++++++++++++
 2 files changed, 170 insertions(+), 2 deletions(-)
 create mode 100644 tests/gateway/test_active_session_text_merge.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index d03bc282ed3..c6bdc38c3b9 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -2961,9 +2961,25 @@ class BasePlatformAdapter(ABC):
                 merge_pending_message_event(self._pending_messages, session_key, event)
                 return  # Don't interrupt now - will run after current task completes
 
-            # Default behavior for non-photo follow-ups: interrupt the running agent
+            # Default behavior for non-photo follow-ups: interrupt the running agent.
+            #
+            # Use merge_text=True so rapid TEXT follow-ups (#4469) accumulate
+            # into the single pending slot instead of clobbering each other.
+            # Without merging, three rapid messages "A", "B", "C" land like:
+            #   _pending_messages[k] = A  (interrupts)
+            #   _pending_messages[k] = B  (replaces A before consumer reads)
+            #   _pending_messages[k] = C  (replaces B)
+            # ...and only "C" reaches the next turn.  merge_pending_message_event
+            # already does the right thing for photo/media bursts; the
+            # ``merge_text=True`` flag extends that to plain TEXT events.
+            # Same shape as the Telegram bursty-grace path in gateway/run.py.
             logger.debug("[%s] New message while session %s is active — triggering interrupt", self.name, session_key)
-            self._pending_messages[session_key] = event
+            merge_pending_message_event(
+                self._pending_messages,
+                session_key,
+                event,
+                merge_text=True,
+            )
             # Signal the interrupt (the processing task checks this)
             self._active_sessions[session_key].set()
             return  # Don't process now - will be handled after current task finishes
diff --git a/tests/gateway/test_active_session_text_merge.py b/tests/gateway/test_active_session_text_merge.py
new file mode 100644
index 00000000000..087f8dbabd0
--- /dev/null
+++ b/tests/gateway/test_active_session_text_merge.py
@@ -0,0 +1,152 @@
+"""Regression test for #4469.
+
+When the agent is actively running (session present in
+``adapter._active_sessions``) and the user fires off multiple TEXT
+follow-ups in rapid succession, the previous behaviour was a single-slot
+replacement at ``gateway/platforms/base.py``:
+
+    self._pending_messages[session_key] = event
+
+So three rapid messages ``A``, ``B``, ``C`` arriving while the agent was
+still working on the initial turn produced a pending slot containing only
+``C``; ``A`` and ``B`` were silently dropped.
+
+The fix routes the follow-up through ``merge_pending_message_event(...,
+merge_text=True)`` so TEXT events accumulate into the existing pending
+event's text instead of clobbering it.  Photo / media bursts continue to
+merge through the same helper (they always did).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+import types
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+# Minimal telegram stub so importing gateway.platforms.base does not pull
+# in the real python-telegram-bot dependency.
+_tg = sys.modules.get("telegram") or types.ModuleType("telegram")
+_tg.constants = sys.modules.get("telegram.constants") or types.ModuleType("telegram.constants")
+_ct = MagicMock()
+_ct.PRIVATE = "private"
+_ct.GROUP = "group"
+_ct.SUPERGROUP = "supergroup"
+_tg.constants.ChatType = _ct
+sys.modules.setdefault("telegram", _tg)
+sys.modules.setdefault("telegram.constants", _tg.constants)
+sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+)
+from gateway.session import SessionSource, build_session_key
+
+
+def _make_event(text: str, chat_id: str = "12345") -> MessageEvent:
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id=chat_id,
+        chat_type="dm",
+        user_id="u1",
+    )
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id=f"msg-{text[:8]}",
+    )
+
+
+def _make_adapter() -> BasePlatformAdapter:
+    """Build a BasePlatformAdapter without running its heavy __init__.
+
+    We only need the bits ``handle_message`` touches on the active-session
+    path: ``_active_sessions``, ``_pending_messages``,
+    ``_message_handler``, ``_busy_session_handler``, ``config``, ``platform``.
+    """
+
+    class _DummyAdapter(BasePlatformAdapter):  # type: ignore[misc]
+        async def connect(self):
+            pass
+
+        async def disconnect(self):
+            pass
+
+        async def get_chat_info(self, chat_id):
+            return None
+
+        async def send(self, *args, **kwargs):
+            return MagicMock(success=True, message_id="x", retryable=False)
+
+    adapter = object.__new__(_DummyAdapter)
+    adapter.config = PlatformConfig(enabled=True, token="***")
+    adapter.platform = Platform.TELEGRAM
+    adapter._message_handler = AsyncMock(return_value=None)
+    adapter._busy_session_handler = None
+    adapter._active_sessions = {}
+    adapter._pending_messages = {}
+    adapter._session_tasks = {}
+    adapter._background_tasks = set()
+    adapter._post_delivery_callbacks = {}
+    adapter._expected_cancelled_tasks = set()
+    adapter._fatal_error_code = None
+    adapter._fatal_error_message = None
+    adapter._fatal_error_retryable = True
+    adapter._fatal_error_handler = None
+    adapter._running = True
+    adapter._auto_tts_default = False
+    adapter._auto_tts_enabled_chats = set()
+    adapter._auto_tts_disabled_chats = set()
+    adapter._typing_paused = set()
+    return adapter
+
+
+@pytest.mark.asyncio
+async def test_rapid_text_followups_accumulate_instead_of_replacing():
+    """Three rapid TEXT follow-ups during an active session must all
+    survive in ``adapter._pending_messages[session_key].text``."""
+    adapter = _make_adapter()
+    first = _make_event("part one")
+    session_key = build_session_key(first.source)
+
+    # Mark the session as active so subsequent messages take the
+    # "already running" branch in handle_message.
+    adapter._active_sessions[session_key] = asyncio.Event()
+
+    second = _make_event("part two")
+    third = _make_event("part three")
+
+    await adapter.handle_message(second)
+    await adapter.handle_message(third)
+
+    # Both rapid follow-ups must be preserved, not just the last one.
+    pending = adapter._pending_messages[session_key]
+    assert pending.text == "part two\npart three", (
+        f"expected accumulated text, got {pending.text!r}"
+    )
+    # Interrupt event must be signalled exactly like before.
+    assert adapter._active_sessions[session_key].is_set()
+
+
+@pytest.mark.asyncio
+async def test_single_followup_is_stored_as_is():
+    """One TEXT follow-up still lands as the event object itself
+    (no spurious wrapping / mutation) — guards against the merge path
+    breaking the simple case."""
+    adapter = _make_adapter()
+    first = _make_event("only one")
+    session_key = build_session_key(first.source)
+
+    adapter._active_sessions[session_key] = asyncio.Event()
+    await adapter.handle_message(first)
+
+    pending = adapter._pending_messages[session_key]
+    assert pending is first
+    assert pending.text == "only one"
+    assert adapter._active_sessions[session_key].is_set()

From fcd9011f8d02d30d5f80db1749cbeb8f2d1b3fc3 Mon Sep 17 00:00:00 2001
From: JunghwanNA <70629228+shaun0927@users.noreply.github.com>
Date: Thu, 16 Apr 2026 12:26:10 +0900
Subject: [PATCH 128/218] fix(security): separate OAuth PKCE state from
 code_verifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PKCE flow reused the code_verifier as the OAuth state parameter.
Per RFC 6749 §10.12 and RFC 7636, these serve different purposes:
state is an anti-CSRF token visible in the authorization URL; the
code_verifier must remain secret for the token exchange.

Generate an independent secrets.token_urlsafe(32) for state and
validate it on callback to provide actual CSRF protection.

Closes #10693
---
 agent/anthropic_adapter.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 4b1134a4c0b..ccb61dc813e 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1064,6 +1064,8 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
     import webbrowser
 
     verifier, challenge = _generate_pkce()
+    import secrets as _secrets
+    oauth_state = _secrets.token_urlsafe(32)
 
     params = {
         "code": "true",
@@ -1073,7 +1075,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
         "scope": _OAUTH_SCOPES,
         "code_challenge": challenge,
         "code_challenge_method": "S256",
-        "state": verifier,
+        "state": oauth_state,
     }
     from urllib.parse import urlencode
 
@@ -1110,7 +1112,12 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
 
     splits = auth_code.split("#")
     code = splits[0]
-    state = splits[1] if len(splits) > 1 else ""
+    received_state = splits[1] if len(splits) > 1 else ""
+
+    # Validate state to prevent CSRF (RFC 6749 §10.12)
+    if received_state != oauth_state:
+        logger.warning("OAuth state mismatch — possible CSRF, aborting")
+        return None
 
     try:
         import urllib.request
@@ -1119,7 +1126,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
             "grant_type": "authorization_code",
             "client_id": _OAUTH_CLIENT_ID,
             "code": code,
-            "state": state,
+            "state": received_state,
             "redirect_uri": _OAUTH_REDIRECT_URI,
             "code_verifier": verifier,
         }).encode()

From 345821b4a1d612bc56cabb548b91b35a76bc3692 Mon Sep 17 00:00:00 2001
From: JunghwanNA <70629228+shaun0927@users.noreply.github.com>
Date: Thu, 16 Apr 2026 14:04:22 +0900
Subject: [PATCH 129/218] style: move secrets import alongside other
 function-level imports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Group the secrets import with time and webbrowser at the top of
run_hermes_oauth_login_pure(), matching the existing pattern.
Drop the _secrets alias — no name conflict in this scope.
---
 agent/anthropic_adapter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index ccb61dc813e..e7e1a8acb6d 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1060,12 +1060,12 @@ def _generate_pkce() -> tuple:
 
 def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
     """Run Hermes-native OAuth PKCE flow and return credential state."""
+    import secrets
     import time
     import webbrowser
 
     verifier, challenge = _generate_pkce()
-    import secrets as _secrets
-    oauth_state = _secrets.token_urlsafe(32)
+    oauth_state = secrets.token_urlsafe(32)
 
     params = {
         "code": "true",

From 72f94f4a7c281f2ac2a944a20eb615f517f64fe8 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 02:27:41 -0700
Subject: [PATCH 130/218] test(security): regression guard for OAuth PKCE
 state/verifier separation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two unit tests for run_hermes_oauth_login_pure():

1. test_authorization_url_state_is_not_pkce_verifier — asserts state in the
   auth URL is independent from the PKCE code_verifier sent in the token
   exchange, and that the verifier never appears in the URL.

2. test_callback_state_mismatch_aborts — asserts the flow returns None
   (no token exchange) when the callback state does not match the value
   we generated.

Negative control verified: reintroducing the b17e5c10 vulnerable pattern
(state = verifier, no callback validation) makes both tests fail.

Also adds AUTHOR_MAP entry for shaun0927 (contributor of the fix).
---
 scripts/release.py                       |   1 +
 tests/agent/test_anthropic_oauth_pkce.py | 170 +++++++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 tests/agent/test_anthropic_oauth_pkce.py

diff --git a/scripts/release.py b/scripts/release.py
index 455467044a3..18d5a46123a 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -59,6 +59,7 @@ AUTHOR_MAP = {
     "m@mobrienv.dev": "mikeyobrien",
     "qiyin.zuo@pcitc.com": "qiyin-code",
     "mr.aashiz@gmail.com": "aashizpoudel",
+    "70629228+shaun0927@users.noreply.github.com": "shaun0927",
     "98262967+Bihruze@users.noreply.github.com": "Bihruze",
     "nidhi2894@gmail.com": "nidhi-singh02",
     "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
diff --git a/tests/agent/test_anthropic_oauth_pkce.py b/tests/agent/test_anthropic_oauth_pkce.py
new file mode 100644
index 00000000000..5cf74d7a6a5
--- /dev/null
+++ b/tests/agent/test_anthropic_oauth_pkce.py
@@ -0,0 +1,170 @@
+"""Regression tests for the Anthropic OAuth PKCE flow.
+
+Guards against re-introducing the bug where the PKCE ``code_verifier`` was
+reused as the OAuth ``state`` parameter, leaking the verifier via the
+authorization URL (browser history, Referer headers, auth-server logs) and
+removing CSRF protection on the callback path.
+
+History:
+  - PR #1775 first fixed this on ``run_hermes_oauth_login()``.
+  - PR #2647 (b17e5c10) added ``run_hermes_oauth_login_pure()`` and silently
+    copy-pasted the pre-#1775 vulnerable pattern.
+  - PR #3107 removed the old function, leaving only the regressed copy.
+  - PR #10699 (issue #10693) fixed the regression on the surviving function.
+"""
+
+from __future__ import annotations
+
+import io
+import json
+from typing import Any, Dict
+from urllib.parse import parse_qs, urlparse
+
+
+def _patch_oauth_flow(
+    monkeypatch,
+    *,
+    callback_code: str,
+    token_response: Dict[str, Any] | None = None,
+    capture_token_request: Dict[str, Any] | None = None,
+    capture_auth_url: Dict[str, str] | None = None,
+) -> None:
+    """Wire up monkeypatches that let ``run_hermes_oauth_login_pure()`` run
+    end-to-end without touching a real browser, stdin, or HTTP endpoint.
+
+    ``callback_code`` is the literal string the user would paste back into the
+    terminal (``"<code>#<state>"`` format).
+    ``capture_token_request`` and ``capture_auth_url`` are out-dict captures
+    so the test can introspect what was sent to the auth URL and the token
+    endpoint, respectively.
+    """
+    import urllib.request
+
+    if token_response is None:
+        token_response = {
+            "access_token": "sk-ant-test-access",
+            "refresh_token": "sk-ant-test-refresh",
+            "expires_in": 3600,
+        }
+
+    def fake_open(url):
+        if capture_auth_url is not None:
+            capture_auth_url["url"] = url
+        return True
+
+    monkeypatch.setattr("webbrowser.open", fake_open)
+    monkeypatch.setattr("builtins.input", lambda *_a, **_kw: callback_code)
+
+    class _FakeResponse:
+        def __init__(self, body: bytes) -> None:
+            self._body = body
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *_exc):
+            return False
+
+        def read(self):
+            return self._body
+
+    def fake_urlopen(req, *_a, **_kw):
+        if capture_token_request is not None:
+            capture_token_request["url"] = req.full_url
+            capture_token_request["data"] = json.loads(req.data.decode())
+            capture_token_request["headers"] = dict(req.headers)
+        return _FakeResponse(json.dumps(token_response).encode())
+
+    monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
+
+
+def test_authorization_url_state_is_not_pkce_verifier(monkeypatch, tmp_path):
+    """The ``state`` parameter in the authorization URL must NOT equal the
+    PKCE ``code_verifier``.
+
+    Reusing the verifier as state leaks the verifier into browser history,
+    Referer headers, and auth-server access logs — defeating RFC 7636.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    captured_url: Dict[str, str] = {}
+    captured_token: Dict[str, Any] = {}
+    _patch_oauth_flow(
+        monkeypatch,
+        # state echoed back unchanged so the CSRF guard passes
+        callback_code="auth-code-from-anthropic#PLACEHOLDER",
+        capture_auth_url=captured_url,
+        capture_token_request=captured_token,
+    )
+
+    # Stub the callback parse: we need the state echoed back to match. To do
+    # that without hardcoding the state value, override input() AFTER seeing
+    # the auth URL.
+    import builtins
+
+    real_input_calls = {"count": 0}
+
+    def fake_input(*_a, **_kw):
+        real_input_calls["count"] += 1
+        # First (and only) call is the "Authorization code:" prompt.
+        url = captured_url.get("url", "")
+        qs = parse_qs(urlparse(url).query)
+        state = qs.get("state", [""])[0]
+        return f"auth-code-from-anthropic#{state}"
+
+    monkeypatch.setattr(builtins, "input", fake_input)
+
+    from agent.anthropic_adapter import run_hermes_oauth_login_pure
+
+    result = run_hermes_oauth_login_pure()
+    assert result is not None, "OAuth flow should succeed with matching state"
+
+    url = captured_url["url"]
+    qs = parse_qs(urlparse(url).query)
+
+    assert "state" in qs and qs["state"][0], "authorization URL must include state"
+    assert "code_challenge" in qs, "authorization URL must include code_challenge"
+
+    state_in_url = qs["state"][0]
+    verifier_sent = captured_token["data"]["code_verifier"]
+
+    # The whole point: state and verifier must be independent values.
+    assert state_in_url != verifier_sent, (
+        "PKCE code_verifier was reused as OAuth state — regression of #10693 / "
+        "#1775. The verifier is supposed to be a secret known only to the "
+        "client; placing it in the authorization URL leaks it via browser "
+        "history, Referer headers, and auth-server logs."
+    )
+
+    # And the verifier MUST NOT appear anywhere in the URL.
+    assert verifier_sent not in url, (
+        "PKCE verifier leaked into authorization URL — regression of #10693"
+    )
+
+
+def test_callback_state_mismatch_aborts(monkeypatch, tmp_path, caplog):
+    """If the state returned in the callback does not match the one we sent
+    in the authorization URL, the flow must abort before exchanging the code.
+
+    Without this check, an attacker who tricks the user into pasting a
+    crafted ``<code>#<state>`` string can complete the token exchange — the
+    CSRF protection that ``state`` is supposed to provide (RFC 6749 §10.12)
+    would be absent.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    captured_token: Dict[str, Any] = {}
+    _patch_oauth_flow(
+        monkeypatch,
+        callback_code="attacker-code#attacker-state-does-not-match",
+        capture_token_request=captured_token,
+    )
+
+    from agent.anthropic_adapter import run_hermes_oauth_login_pure
+
+    result = run_hermes_oauth_login_pure()
+
+    assert result is None, "mismatched state must abort the flow"
+    assert "url" not in captured_token, (
+        "token exchange must NOT happen when state mismatches"
+    )

From a91a57fa5a13d516c38b07a141a9ce8a3daabeb0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 02:58:57 -0700
Subject: [PATCH 131/218] chore: release v0.14.0 (2026.5.16) (#26862)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Foundation Release — Hermes installs and runs anywhere now.

Highlights:
- Native Windows support (early beta) — PowerShell installer, native subprocess/PTY paths, ~40 follow-up Windows-only fixes
- pip install hermes-agent — PyPI wheel
- Cold-start wave — ~19s off hermes launch, 180x faster browser_console (CDP WS)
- Supply-chain advisory checker + lazy-deps + tiered install fallback
- OpenAI-compatible local proxy for OAuth providers (Claude Pro, ChatGPT Pro, SuperGrok)
- Cross-session 1h Claude prompt cache (Anthropic / OpenRouter / Nous Portal)
- 2 new platforms: LINE + SimpleX Chat (22 total)
- Microsoft Graph foundation — Teams pipeline + webhook adapter
- /handoff actually transfers sessions live
- x_search first-class tool, vision_analyze pixel passthrough
- LSP semantic diagnostics on every write
- Unified video_generate with pluggable backends
- computer_use cua-driver backend
- 9 new optional skills, OpenRouter Pareto Code router, xAI Grok OAuth
- 12 P0 + 50 P1 closures

808 commits · 633 PRs · 1393 files · 165k insertions · 545 issues closed · 215 contributors
---
 RELEASE_v0.14.0.md     | 477 +++++++++++++++++++++++++++++++++++++++++
 hermes_cli/__init__.py |   4 +-
 pyproject.toml         |  21 +-
 scripts/release.py     |   5 +
 4 files changed, 492 insertions(+), 15 deletions(-)
 create mode 100644 RELEASE_v0.14.0.md

diff --git a/RELEASE_v0.14.0.md b/RELEASE_v0.14.0.md
new file mode 100644
index 00000000000..38d40db8c69
--- /dev/null
+++ b/RELEASE_v0.14.0.md
@@ -0,0 +1,477 @@
+# Hermes Agent v0.14.0 (v2026.5.16)
+
+**Release Date:** May 16, 2026
+**Since v0.13.0:** 808 commits · 633 merged PRs · 1393 files changed · 165,061 insertions · 545 issues closed (12 P0, 50 P1) · 215 community contributors (including co-authors)
+
+> The Foundation Release — Hermes Agent installs and runs anywhere now. Native Windows ships in early beta with a full PowerShell installer story, a `pip install hermes-agent` wheel lands on PyPI, lazy-deps reshape what `pip install hermes-agent` actually pulls down, the supply-chain checker scans every install/upgrade for unsafe versions, and a new OpenAI-compatible local proxy lets Codex / Aider / Cline talk to OAuth-only providers (Claude Pro, ChatGPT Pro, SuperGrok). The cold-start wave shaves ~19 seconds off `hermes` launch, browser-tool CDP calls run 180x faster, and `hermes tools` All-Platforms drops from 14s to under 1.5s. Two new messaging platforms (LINE and SimpleX Chat) and a Microsoft Graph foundation (Teams pipeline + webhook adapter) land alongside `/handoff` that finally transfers sessions live, `vision_analyze` passing pixels through to vision-capable models, `x_search` as a first-class tool, LSP semantic diagnostics on every `write_file` / `patch`, a unified pluggable `video_generate`, a `computer_use` cua-driver backend, cross-session 1-hour Claude prompt caching, a per-turn file-mutation verifier, plus 9 new optional skills. 50+ P1 closures, 12 P0 closures.
+
+---
+
+## ✨ Highlights
+
+- **Native Windows support (early beta)** — full PowerShell installer, native subprocess/PTY paths, taskkill-based process management, MinGit auto-install, Microsoft Store python stub detection, foreground Ctrl+C preservation, taskkill+ps2 fallback, npm prefix handling, and ~40 follow-up Windows-only fixes across CLI / gateway / TUI / curator / tools. Hermes finally runs natively on `cmd.exe` and PowerShell, no WSL required. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561), [#22130](https://github.com/NousResearch/hermes-agent/pull/22130), [#22752](https://github.com/NousResearch/hermes-agent/pull/22752), [#26618](https://github.com/NousResearch/hermes-agent/pull/26618), and many more)
+
+- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. One command, no clone, no git, no shell installer. Wheel includes the Ink TUI bundle and shell launcher. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
+
+- **Cold-start performance wave — ~19s off `hermes` launch** — skills cache, lazy Feishu import, no Nous HTTP at startup, plus PEP-562 lazy adapter imports (QQ, Yuanbao, Teams, Google Chat), deferred `fal_client` / `google-cloud` / `httpx` loads, models.dev disk-cache-first lookup, parallel doctor API checks, eager-skip plugin discovery on built-in subcommands, `hermes tools` All-Platforms drops from 14s to <1.5s, welcome banner skipped on `chat -q`. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
+
+- **180x faster `browser_console` evaluations** — routed through the supervisor's persistent CDP WebSocket instead of spawning a fresh DevTools session per call. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
+
+- **Supply-chain advisory checker + lazy-deps framework + tiered install fallback** — every `pip install` / `hermes update` scans dependencies against an advisory list, lazy-deps replace heavy import-time loads with first-use installs, and the installer falls back through extras tiers when a wheel rejects on the target platform. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
+
+- **OpenAI-compatible local proxy** — `hermes proxy` exposes any OAuth-authed provider (Claude Pro, ChatGPT Pro, SuperGrok) as an OpenAI-compatible endpoint that Codex / Aider / Cline / VS Code Continue can hit. Your subscription, your tools. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
+
+- **Cross-session 1-hour Claude prompt cache** — Anthropic / OpenRouter / Nous Portal now share a 1h prefix cache across sessions for Claude models. Fast resume, fast `/new`, lower cost on repeat work. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828))
+
+- **Two new messaging platforms — LINE + SimpleX Chat** — LINE Messaging API lands as a first-class platform, SimpleX Chat salvages #2558 onto the modern adapter spec. Hermes is now on 22 platforms. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
+
+- **Microsoft Graph foundation — Teams pipeline + webhook adapter** — `msgraph` auth/client foundation, webhook listener platform, Teams pipeline plugin runtime, and Teams outbound delivery via the existing adapter — Hermes can now read and post to Teams. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
+
+- **`/handoff` actually transfers the session live** — the agent's active session moves to a different model / persona / profile mid-conversation, with messages, tool history, and context preserved. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
+
+- **`x_search` — first-class X (Twitter) search tool** — gated tool with OAuth-or-API-key auth, no skill needed to query the timeline. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
+
+- **`vision_analyze` returns pixels to vision-capable models** — when the active model can see, `vision_analyze` now hands the image straight through instead of falling back to a text description. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
+
+- **LSP semantic diagnostics on every write** — `write_file` and `patch` now run real language-server diagnostics on the post-edit file (delta-only) and surface real errors before they ship downstream. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
+
+- **Per-turn file-mutation verifier footer** — after every turn that wrote files, the agent gets a verifier footer summarizing what actually changed on disk — catches silent overwrites and "wrote it but it didn't land" bugs. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
+
+- **Unified `video_generate` with pluggable provider backends** — single tool, any backend. Drop in a new video provider as a plugin, no core changes. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
+
+- **`computer_use` cua-driver backend** — proper focus-safe ops, non-Anthropic provider support, refresh on `hermes update`. Computer-use is no longer locked to a single SDK. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
+
+- **xAI Grok OAuth provider — SuperGrok via subscription** — sign in with your xAI account, talk to Grok models from Hermes. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534))
+
+- **Clarify with buttons — native inline keyboards on Telegram + Discord** — the `clarify` tool renders multi-choice prompts as platform-native buttons instead of typed responses. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
+
+- **Discord channel history backfill (default on)** — Hermes reads recent channel history when joining a thread so it actually knows what's been said. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
+
+- **Watchers skill — RSS / HTTP JSON / GitHub polling via cron `no_agent` mode** — skill recipes that wire change-detection sources directly into cron's script-only watchdog mode. ([#21881](https://github.com/NousResearch/hermes-agent/pull/21881))
+
+- **Zed ACP Registry integration + uvx distribution** — Hermes is in the Zed registry, installable via `uvx` (no npm). Plus `hermes acp --setup-browser` bootstraps browser tools for registry installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
+
+- **OpenRouter Pareto Code router** — wire a new OpenRouter router with `min_coding_score` knob. Pick the cheapest model that meets your quality bar. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
+
+- **Optional codex app-server runtime for OpenAI/Codex models** — drives the OpenAI Codex CLI under the hood for OpenAI/Codex paths, with session reuse, wedge retirement, and OAuth refresh classification. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
+
+- **`hermes-skills/huggingface` as a trusted default tap** — community skills index from huggingface.co/skills is available by default in the Skills Hub. ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
+
+- **9 new optional skills** — Hyperliquid (perp/spot trading via SDK + REST) (@kshitijk4poor & Hermes), Yahoo Finance market data, api-testing (REST/GraphQL debug), unified EVM multi-chain skill (folds #25291 + #2010 + base/), darwinian-evolver, osint-investigation (closes #355), pinggy-tunnel, watchers (RSS/HTTP/GitHub via cron), Notion overhaul for the Developer Platform (May 2026). ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
+
+- **API server exposes run approval events** — long-running runs surface approval requests over the API stream, no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
+
+- **`/subgoal` — user-added criteria appended to active `/goal`** — layer extra success criteria onto a running goal loop. The judge sees them in the prompt, no behavior change when subgoals are empty. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
+
+- **Plugins can run any LLM call via `ctx.llm`** — plugins get a first-class hook to make their own LLM requests through the active provider/credentials, no manual wiring. Plus `tool_override` flag for replacing built-in tools. ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
+
+- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — two new free search backends alongside Tavily / SearXNG / Exa. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
+
+- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS classification** — closes the `sudo -S` brute-force avenue; approval gates classify stdin-fed and askpass-stripped sudo invocations as dangerous. (salvages of #22194 + #21128) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
+
+- **Provider rename — Alibaba Cloud → Qwen Cloud, picker reorder** — matches what the world calls it. Existing config keys still work. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
+
+
+---
+
+## 🪟 Windows — Native Support (Early Beta)
+
+### Bootstrap & installer
+- **Native Windows support (early beta)** — first-class native Windows path across CLI / gateway / TUI / tools ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
+- **PyPI wheel packaging — `pip install hermes-agent && hermes`** (salvage of #26350) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
+- **Recognise Shift+Enter as a newline key** + Windows docs (salvage #21545) ([#22130](https://github.com/NousResearch/hermes-agent/pull/22130))
+- **Preserve Ctrl+C for Windows foreground runs** (@helix4u) ([#22752](https://github.com/NousResearch/hermes-agent/pull/22752))
+- **Stop spamming cwd-missing + tirith-spawn warnings on every terminal call** ([#26618](https://github.com/NousResearch/hermes-agent/pull/26618))
+- **Use `--extra all` not `--all-extras`; drop lazy-covered extras from `[all]`** ([#24515](https://github.com/NousResearch/hermes-agent/pull/24515))
+
+### Windows-specific fixes (40+ across cli / tools / gateway / curator / TUI)
+A long tail of native-Windows fixes shipped alongside the beta — taskkill-based subprocess management, MinGit auto-install, Microsoft Store python stub detection, npm prefix handling, native PTY paths, signal handling differences, foreground process management, ANSI sequence handling, path normalization, file-locking semantics, and many more. Full list in commit log under `fix(windows)` / `feat(windows)` / `windows`.
+
+---
+
+## 🚀 Performance Wave
+
+### Cold start
+- **Cut ~19s from `hermes` cold start** — skills cache + lazy Feishu + no Nous HTTP at startup ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138))
+- **Skip eager plugin discovery on known built-in subcommands** ([#22120](https://github.com/NousResearch/hermes-agent/pull/22120))
+- **Cache Nous auth + .env loads** — `hermes tools` All Platforms from 14s to <1.5s ([#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
+- **Skip welcome banner on `chat -q` single-query mode** ([#22904](https://github.com/NousResearch/hermes-agent/pull/22904))
+- **Defer heavy google-cloud imports in google_chat to first adapter use** ([#22681](https://github.com/NousResearch/hermes-agent/pull/22681))
+- **Defer QQAdapter and YuanbaoAdapter imports via PEP 562** ([#22790](https://github.com/NousResearch/hermes-agent/pull/22790))
+- **Defer httpx import in teams to first webhook call** ([#22831](https://github.com/NousResearch/hermes-agent/pull/22831))
+- **Defer fal_client import to first generation request** ([#22859](https://github.com/NousResearch/hermes-agent/pull/22859))
+- **models.dev cache-first lookup, skip network when disk cache is fresh** ([#22808](https://github.com/NousResearch/hermes-agent/pull/22808))
+- **Parallelize API connectivity checks in `hermes doctor` and disable IMDS** ([#22766](https://github.com/NousResearch/hermes-agent/pull/22766))
+
+### Runtime
+- **180x faster `browser_console` evaluations** — route through supervisor's persistent CDP WebSocket ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
+- **Tune Telegram cadence + adaptive fast-path for short replies** (salvage of #10388) ([#23587](https://github.com/NousResearch/hermes-agent/pull/23587))
+- **Accumulate length-continuation prefix via list+join** ([#26237](https://github.com/NousResearch/hermes-agent/pull/26237))
+
+### Prompt caching
+- **Cross-session 1h prefix cache for Claude on Anthropic / OpenRouter / Nous Portal** ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828))
+- **Hit prefix cache in background review fork** (salvage #17276 + #25427) ([#25434](https://github.com/NousResearch/hermes-agent/pull/25434))
+
+---
+
+## 📦 Installation & Distribution
+
+### PyPI + supply-chain
+- **PyPI wheel packaging — `pip install hermes-agent && hermes`** (salvage of #26350) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
+- **Supply-chain advisory checker + lazy-install framework + tiered install fallback** ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
+- **Use `--extra all` not `--all-extras`; drop lazy-covered extras from `[all]`** ([#24515](https://github.com/NousResearch/hermes-agent/pull/24515))
+- **Skip browser download when system chromium exists** (@helix4u) ([#25317](https://github.com/NousResearch/hermes-agent/pull/25317))
+
+### Nix
+- **`extraDependencyGroups` for sealed venv extras** (@alt-glitch) ([#21817](https://github.com/NousResearch/hermes-agent/pull/21817))
+- **Refresh npm lockfile hashes** — keeps Nix flake builds reproducible
+
+### Docker
+- **Bootstrap auth.json from env on first boot** ([#21880](https://github.com/NousResearch/hermes-agent/pull/21880))
+- **Drop manual @hermes/ink build, rely on esbuild bundle** — slimmer image
+
+### ACP / Zed
+- **Zed ACP Registry integration** (salvage of #25908) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079))
+- **Switch to uvx distribution, drop npm launcher** ([#26120](https://github.com/NousResearch/hermes-agent/pull/26120))
+- **`hermes acp --setup-browser` bootstraps browser tools for registry installs** ([#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Sessions & handoff
+- **`/handoff` actually transfers the session live** ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
+- **Expose `HERMES_SESSION_ID` env var to agent tools** (@alt-glitch) ([#23847](https://github.com/NousResearch/hermes-agent/pull/23847))
+
+### Goals (Ralph loop)
+- **`/subgoal` — user-added criteria appended to active `/goal`** ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
+- **`/goal` checklist + /subgoal user controls** ([#23456](https://github.com/NousResearch/hermes-agent/pull/23456)) — rolled back in window ([#23813](https://github.com/NousResearch/hermes-agent/pull/23813)); /subgoal returned in simpler form via #25449
+
+### Compression
+- **Make `protect_first_n` configurable** ([#25447](https://github.com/NousResearch/hermes-agent/pull/25447))
+
+### Verification
+- **Per-turn file-mutation verifier footer** ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
+
+### Stream retry
+- **Log inner cause, upstream headers, bytes/elapsed on every drop** ([#23005](https://github.com/NousResearch/hermes-agent/pull/23005))
+
+---
+
+## 🤖 Models & Providers
+
+### New providers
+- **xAI Grok OAuth (SuperGrok Subscription) provider** ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534))
+- **NovitaAI provider** (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
+- **NVIDIA NIM billing origin header** (salvage #25211) ([#26585](https://github.com/NousResearch/hermes-agent/pull/26585))
+
+### Provider work
+- **OpenRouter Pareto Code router with `min_coding_score` knob** ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
+- **Optional codex app-server runtime for OpenAI/Codex models** ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182))
+- **Codex-runtime: retire wedged sessions + post-tool watchdog + OAuth refresh classify** ([#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
+- **Codex-runtime: skip unavailable plugins during migration** ([#25437](https://github.com/NousResearch/hermes-agent/pull/25437))
+- **Codex-runtime: de-dup `[plugins.X]` tables and stop leaking HERMES_HOME into config.toml** (#26250) (@kshitijk4poor) ([#26260](https://github.com/NousResearch/hermes-agent/pull/26260))
+- **Pass `reasoning.effort` to xAI Responses API** ([#22807](https://github.com/NousResearch/hermes-agent/pull/22807))
+- **Custom provider: prompt and persist explicit `api_mode`** ([#25068](https://github.com/NousResearch/hermes-agent/pull/25068))
+- **Rename Alibaba Cloud → Qwen Cloud, reorder picker** ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
+- **Restore gpt-5.3-codex-spark for ChatGPT Pro** (salvage #18286 + #19530, fixes #16172) (@kshitijk4poor) ([#22991](https://github.com/NousResearch/hermes-agent/pull/22991))
+- **Inject tool-use enforcement for GLM models** ([#24715](https://github.com/NousResearch/hermes-agent/pull/24715))
+- **Use Nous Portal as model metadata authority** (@rob-maron) ([#24502](https://github.com/NousResearch/hermes-agent/pull/24502))
+- **Unified `client=hermes-client-v<version>` tag on every Portal request** ([#24779](https://github.com/NousResearch/hermes-agent/pull/24779))
+- **Prevent stale Ollama credentials after provider switch** (@kshitijk4poor) ([#21703](https://github.com/NousResearch/hermes-agent/pull/21703))
+- **Auxiliary client: rotate pooled auth after quota failures** (salvage #22779) ([#22792](https://github.com/NousResearch/hermes-agent/pull/22792))
+- **Auxiliary client: skip providers without credentials immediately** (#25395) ([#25487](https://github.com/NousResearch/hermes-agent/pull/25487))
+- **Auth: send Nous refresh token via header** (@shannonsands) ([#21578](https://github.com/NousResearch/hermes-agent/pull/21578))
+- **MiniMax: harden OAuth dashboard and runtime** ([#24165](https://github.com/NousResearch/hermes-agent/pull/24165))
+
+### OpenAI-compatible proxy
+- **Local OpenAI-compatible proxy for OAuth providers** — Codex / Aider / Cline can hit Claude Pro, ChatGPT Pro, SuperGrok ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New platforms
+- **LINE Messaging API platform plugin** ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197))
+- **SimpleX Chat platform plugin** (salvages #2558) ([#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
+
+### Microsoft Graph foundation
+- **msgraph: add auth and client foundation** (salvage of #21408) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922))
+- **msgraph: add webhook listener platform** (salvage of #21409) ([#21969](https://github.com/NousResearch/hermes-agent/pull/21969))
+- **teams-pipeline: add plugin runtime and operator cli** (salvage of #21410) ([#22007](https://github.com/NousResearch/hermes-agent/pull/22007))
+- **teams: add pipeline outbound delivery via existing adapter** (salvage of #21411) ([#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
+
+### Cross-platform
+- **Per-platform admin/user split for slash commands** (salvage of #4443) ([#23373](https://github.com/NousResearch/hermes-agent/pull/23373))
+- **Forensics on signal handling — non-blocking diag, per-phase timing, stale-unit warning** ([#23285](https://github.com/NousResearch/hermes-agent/pull/23285))
+- **Keep gateway running when platforms fail; add per-platform circuit breaker + `/platform`** ([#26600](https://github.com/NousResearch/hermes-agent/pull/26600))
+- **Wire `clarify` tool with inline keyboard buttons on Telegram** ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199))
+- **Add `chat_id` to `hook_ctx` for message source tracking** ([#24710](https://github.com/NousResearch/hermes-agent/pull/24710))
+
+### Telegram
+- **Native draft streaming via `sendMessageDraft` (Bot API 9.5+)** (salvage of #3412) ([#23512](https://github.com/NousResearch/hermes-agent/pull/23512))
+- **Stream Telegram edits safely** — salvage of #22264 (@kshitijk4poor) ([#22518](https://github.com/NousResearch/hermes-agent/pull/22518))
+- **Telegram notification mode** (salvage #22772) ([#22793](https://github.com/NousResearch/hermes-agent/pull/22793))
+- **Telegram guest mention mode** (@kshitijk4poor) ([#22759](https://github.com/NousResearch/hermes-agent/pull/22759))
+- **Split-and-deliver oversized edits instead of silent truncation** (salvage of #19537) ([#23576](https://github.com/NousResearch/hermes-agent/pull/23576))
+- **Preserve DM topic routing via reply fallback** (salvage #22053) (@kshitijk4poor) ([#22410](https://github.com/NousResearch/hermes-agent/pull/22410))
+- **Pass `source.thread_id` explicitly on auto-reset notice** (carve-out of #7404) ([#23440](https://github.com/NousResearch/hermes-agent/pull/23440))
+
+### Discord
+- **Render clarify choices as buttons** ([#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
+- **Channel history backfill — default on, broadened scope** ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
+- **`thread_require_mention` for multi-bot threads** (salvage #25313) ([#25445](https://github.com/NousResearch/hermes-agent/pull/25445))
+
+### Slack
+- **Support `!cmd` as alternate prefix for slash commands in threads** ([#25355](https://github.com/NousResearch/hermes-agent/pull/25355))
+
+### WhatsApp
+- **Surface quoted reply metadata from Baileys** (#25398) ([#25489](https://github.com/NousResearch/hermes-agent/pull/25489))
+
+### Feishu / Google Chat / others
+- **Feishu: native update prompt cards** (@kshitijk4poor) ([#22448](https://github.com/NousResearch/hermes-agent/pull/22448))
+- **Google Chat: repair setup prompt imports** (@helix4u) ([#22038](https://github.com/NousResearch/hermes-agent/pull/22038))
+- **Google Chat: honor relay-declared sender_type** (salvage of #22107) (@kshitijk4poor) ([#22432](https://github.com/NousResearch/hermes-agent/pull/22432))
+- **LINE: use `build_source` instead of nonexistent `create_source`** ([#24717](https://github.com/NousResearch/hermes-agent/pull/24717))
+- **Add `weixin, and more` to gateway docs** (salvage of #21063 by @wuwuzhijing)
+
+---
+
+## 🖥️ CLI & TUI
+
+### CLI
+- **Show YOLO mode warning in banner and status bar** ([#26238](https://github.com/NousResearch/hermes-agent/pull/26238))
+- **Confirm prompt for destructive slash commands** (#4069) ([#22687](https://github.com/NousResearch/hermes-agent/pull/22687))
+- **`docker_extra_args` + `display.timestamps`** ([#23599](https://github.com/NousResearch/hermes-agent/pull/23599))
+- **Delegate tool: show user's actual concurrency / spawn-depth limits in description** ([#22694](https://github.com/NousResearch/hermes-agent/pull/22694))
+
+### TUI
+- **`/sessions` slash command for browsing and resuming previous sessions** (@austinpickett) ([#20805](https://github.com/NousResearch/hermes-agent/pull/20805))
+- **Segment turns with rule above non-first user msgs; trim ticker dead space** (@OutThisLife) ([#21846](https://github.com/NousResearch/hermes-agent/pull/21846))
+- **Support attaching to an existing gateway** (@OutThisLife) ([#21978](https://github.com/NousResearch/hermes-agent/pull/21978))
+- **Resolve markdown links to readable page titles** (@OutThisLife) ([#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
+- **Width-aware markdown table rendering with vertical fallback** (@alt-glitch) ([#26195](https://github.com/NousResearch/hermes-agent/pull/26195))
+- **Keep Ink displayCursor in sync with fast-echo writes so cursor stops drifting** (@OutThisLife) ([#26717](https://github.com/NousResearch/hermes-agent/pull/26717))
+- **Allow transcript scroll + Esc during approval/clarify/confirm prompts** (@OutThisLife) ([#26414](https://github.com/NousResearch/hermes-agent/pull/26414))
+- **Preserve session when switching personality** (@austinpickett) ([#20942](https://github.com/NousResearch/hermes-agent/pull/20942))
+- **Skip native safety net on OSC52-capable terminals** (@benbarclay) ([#20954](https://github.com/NousResearch/hermes-agent/pull/20954))
+
+### Dashboard / GUI
+- **Route embedded TUI through dashboard gateway** (@OutThisLife) ([#21979](https://github.com/NousResearch/hermes-agent/pull/21979))
+- **Hide token/cost analytics behind config flag (default off)** ([#25438](https://github.com/NousResearch/hermes-agent/pull/25438))
+- **Fix Langfuse observability — trace I/O, tool outputs, placeholder credentials** (closes #22342, #22763) (@kshitijk4poor) ([#26320](https://github.com/NousResearch/hermes-agent/pull/26320))
+- **MiniMax 'Login' button launched Claude OAuth** (salvage #22849) ([#24058](https://github.com/NousResearch/hermes-agent/pull/24058))
+- **Update cron modals** (@austinpickett) ([#25985](https://github.com/NousResearch/hermes-agent/pull/25985))
+- **Analytics: prevent silent token loss and add Claude 4.5–4.7 pricing** (@austinpickett) ([#21455](https://github.com/NousResearch/hermes-agent/pull/21455))
+
+---
+
+## 🔧 Tools & Capabilities
+
+### Vision & video
+- **`vision_analyze` returns pixels to vision-capable models** ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
+- **Unified `video_generate` with pluggable provider backends** ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
+- **`image_gen`: actionable setup message when no FAL backend is reachable** ([#26222](https://github.com/NousResearch/hermes-agent/pull/26222))
+
+### Computer use
+- **`computer_use` cua-driver backend + focus-safe ops + non-Anthropic provider fix** (re-salvage #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967))
+- **Refresh cua-driver on `hermes update` + add `install --upgrade`** ([#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
+
+### LSP & write-time diagnostics
+- **Semantic diagnostics from real language servers in `write_file`/`patch`** ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168))
+- **Shift baseline diagnostics into post-edit coordinates** ([#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
+
+### Search & web
+- **Brave Search (free tier) and DDGS search providers** ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
+- **Bearer auth header for Tavily `/crawl` endpoint** ([#24658](https://github.com/NousResearch/hermes-agent/pull/24658))
+
+### X (Twitter)
+- **Gated `x_search` tool with OAuth-or-API-key auth** ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
+
+### Browser
+- **Route `browser_console` eval through supervisor's persistent CDP WS (180x faster)** ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
+- **Support externally managed Camofox sessions** ([#24499](https://github.com/NousResearch/hermes-agent/pull/24499))
+
+### MCP
+- **`supports_parallel_tool_calls` for MCP servers** (salvage of #9944) ([#26825](https://github.com/NousResearch/hermes-agent/pull/26825))
+- **Codex preset for Codex CLI MCP server** (salvage #22663) ([#22679](https://github.com/NousResearch/hermes-agent/pull/22679))
+- **Stop retrying initial MCP auth failures** (#25624) ([#25776](https://github.com/NousResearch/hermes-agent/pull/25776))
+
+### Google Workspace
+- **Drive write ops + Docs/Sheets create/append** ([#21895](https://github.com/NousResearch/hermes-agent/pull/21895))
+
+### Per-turn verifier
+- **Per-turn file-mutation verifier footer** ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
+
+---
+
+## 🧩 Kanban (Multi-Agent)
+
+- **`specify` — auxiliary LLM fleshes out triage tasks** ([#21435](https://github.com/NousResearch/hermes-agent/pull/21435))
+- **Orchestrator board tools — `kanban_list` + `kanban_unblock`** (carve-out of #20568) ([#23012](https://github.com/NousResearch/hermes-agent/pull/23012))
+- **`stranded_in_ready` diagnostic for unclaimed tasks** ([#23578](https://github.com/NousResearch/hermes-agent/pull/23578))
+- **Dashboard batch QOL upgrade** (salvage of #23240) ([#23550](https://github.com/NousResearch/hermes-agent/pull/23550))
+- **Tooltips and docs link across dashboard** ([#21541](https://github.com/NousResearch/hermes-agent/pull/21541))
+- **Dedupe notifier delivery via atomic claim + rewind on failure** (salvage #22558) ([#23401](https://github.com/NousResearch/hermes-agent/pull/23401))
+- **Keep notifier subscriptions alive across retry cycles** (salvage #21398) ([#23423](https://github.com/NousResearch/hermes-agent/pull/23423))
+- **Drop caller-controlled author override in `kanban_comment`** (salvage of #22109) (@kshitijk4poor) ([#22435](https://github.com/NousResearch/hermes-agent/pull/22435))
+- **Sanitize comment author rendering in `build_worker_context`** ([#22769](https://github.com/NousResearch/hermes-agent/pull/22769))
+
+---
+
+## 🧠 Plugins & Extension
+
+### Plugin surface
+- **Run any LLM call from inside a plugin via `ctx.llm`** ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194))
+- **`tool_override` flag for replacing built-in tools** (closes #11049) ([#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
+- **`standalone_sender_fn` for out-of-process cron delivery** (@kshitijk4poor) ([#22461](https://github.com/NousResearch/hermes-agent/pull/22461))
+- **`HERMES_PLUGINS_DEBUG=1` surfaces plugin discovery logs** ([#22684](https://github.com/NousResearch/hermes-agent/pull/22684))
+- **Hindsight-client as optional dependency** (@alt-glitch) ([#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
+
+### Profile & distribution
+- **Shareable profile distributions via git** ([#20831](https://github.com/NousResearch/hermes-agent/pull/20831))
+
+---
+
+## ⏰ Cron
+
+- **Routing intent — `deliver=all` fans out to every connected channel** ([#21495](https://github.com/NousResearch/hermes-agent/pull/21495))
+- **Support name-based lookup for job operations** ([#26231](https://github.com/NousResearch/hermes-agent/pull/26231))
+- **Blank Cron dashboard tab + partial-record crashes** (salvage #21042 + #22330) (@kshitijk4poor) ([#22389](https://github.com/NousResearch/hermes-agent/pull/22389))
+- **Do not seed `HERMES_SESSION_*` contextvars from cron origin** (salvage of #22356) (@kshitijk4poor) ([#22382](https://github.com/NousResearch/hermes-agent/pull/22382))
+- **Scan assembled prompt including skill content for prompt injection** (#3968)
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills Hub
+- **`hermes-skills/huggingface` as a trusted default tap** (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
+- **Show per-skill pages in the left sidebar** ([#26646](https://github.com/NousResearch/hermes-agent/pull/26646))
+- **Richer info panels on the Skills Hub** ([#22905](https://github.com/NousResearch/hermes-agent/pull/22905))
+- **Refuse `skill_view` name collisions instead of guessing** (closes #6136 @polkn)
+
+### Curator
+- **Show rename map in user-visible summary** ([#22910](https://github.com/NousResearch/hermes-agent/pull/22910))
+- **Hint at `hermes curator pin` in the rename block** ([#23212](https://github.com/NousResearch/hermes-agent/pull/23212))
+
+### New optional skills
+- **Hyperliquid** — perp/spot trading via SDK + REST (salvage of #1952) ([#23583](https://github.com/NousResearch/hermes-agent/pull/23583))
+- **Yahoo Finance** market data ([#23590](https://github.com/NousResearch/hermes-agent/pull/23590))
+- **api-testing** (REST/GraphQL debug, salvages #1800) ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582))
+- **Unified EVM multi-chain skill** (salvages #25291 + #2010 + folds in base/) ([#25299](https://github.com/NousResearch/hermes-agent/pull/25299))
+- **darwinian-evolver** ([#26760](https://github.com/NousResearch/hermes-agent/pull/26760))
+- **osint-investigation** (closes #355) ([#26729](https://github.com/NousResearch/hermes-agent/pull/26729))
+- **pinggy-tunnel** ([#26765](https://github.com/NousResearch/hermes-agent/pull/26765))
+- **watchers** — RSS / HTTP JSON / GitHub polling via cron no-agent ([#21881](https://github.com/NousResearch/hermes-agent/pull/21881))
+- **Notion overhaul for the Developer Platform** (May 2026) ([#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
+
+---
+
+## 🔒 Security & Reliability
+
+### Security hardening
+- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS** (salvage of #22194 + #21128) (@kshitijk4poor) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
+- **Drop caller-controlled author override in `kanban_comment`** (salvage of #22109) (@kshitijk4poor) ([#22435](https://github.com/NousResearch/hermes-agent/pull/22435))
+- **Cover remaining SSRF fetch paths in skills-hub** (salvage #22804) ([#22843](https://github.com/NousResearch/hermes-agent/pull/22843))
+- **Use credential_pool for custom endpoint model listing probes** (salvage #22810) ([#22842](https://github.com/NousResearch/hermes-agent/pull/22842))
+- **Require dashboard auth for plugin API routes** (salvage #19541) ([#23220](https://github.com/NousResearch/hermes-agent/pull/23220))
+- **Sanitize env and redact output in quick commands + remove write-only `_pending_messages`** ([#23584](https://github.com/NousResearch/hermes-agent/pull/23584))
+- **Reduce unnecessary `shell=True` in subprocess calls** ([#25149](https://github.com/NousResearch/hermes-agent/pull/25149))
+- **Sanitize Google Chat sender_type from relay** (salvage of #22107) (@kshitijk4poor) ([#22432](https://github.com/NousResearch/hermes-agent/pull/22432))
+- **Supply-chain advisory checker** ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
+- **Rewrite security policy around OS-level isolation as the boundary** (@jquesnelle) ([#20317](https://github.com/NousResearch/hermes-agent/pull/20317))
+- **Remove public security advisory page** ([#24253](https://github.com/NousResearch/hermes-agent/pull/24253))
+
+### Reliability — notable bug closures
+- **SQLite: fall back to `journal_mode=DELETE` on NFS/SMB/FUSE** (fixes `/resume` on network mounts) (@kshitijk4poor) ([#22043](https://github.com/NousResearch/hermes-agent/pull/22043))
+- **Codex-runtime: retire wedged sessions + post-tool watchdog + OAuth refresh classify** ([#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
+- **Codex-runtime: de-dup `[plugins.X]` tables and stop leaking HERMES_HOME** (#26250) (@kshitijk4poor) ([#26260](https://github.com/NousResearch/hermes-agent/pull/26260))
+- **Daytona: migrate legacy-sandbox lookup to cursor-based `list()`** ([#24587](https://github.com/NousResearch/hermes-agent/pull/24587))
+- **MCP: stop retrying initial MCP auth failures** (#25624) ([#25776](https://github.com/NousResearch/hermes-agent/pull/25776))
+- **Gateway: enable text-intercept for multi-choice clarify fallback** (#25587) ([#25778](https://github.com/NousResearch/hermes-agent/pull/25778))
+- **Gateway: keep running when platforms fail; per-platform circuit breaker + `/platform`** ([#26600](https://github.com/NousResearch/hermes-agent/pull/26600))
+- **Delegate: salvage #21933 JSON-string batch + diagnostic logging** (@kshitijk4poor) ([#22436](https://github.com/NousResearch/hermes-agent/pull/22436))
+- **Profiles+banner: exclude infrastructure from `--clone-all` + fix stale update-check repo resolution** (@kshitijk4poor) ([#22475](https://github.com/NousResearch/hermes-agent/pull/22475))
+- **ACP: inline file attachment resources** (salvage #21400 + image support) ([#21407](https://github.com/NousResearch/hermes-agent/pull/21407))
+- **CI: unblock shared PR checks** (@stephenschoettler) ([#21012](https://github.com/NousResearch/hermes-agent/pull/21012), [#25957](https://github.com/NousResearch/hermes-agent/pull/25957))
+
+### Notable reverts in window
+- **`/goal` checklist + /subgoal feature stack** — rolled back ([#23813](https://github.com/NousResearch/hermes-agent/pull/23813)); `/subgoal` returned in simpler form via [#25449](https://github.com/NousResearch/hermes-agent/pull/25449)
+- **Scrollback box width clamp** (#25975) rolled back to restore full-width borders ([#26163](https://github.com/NousResearch/hermes-agent/pull/26163))
+- **`fix(cli): tolerate unreadable dirs when building systemd PATH`** rolled back
+
+---
+
+## 🌍 i18n
+
+- **Localize all gateway commands + web dashboard, add 8 new locales (16 total)** ([#22914](https://github.com/NousResearch/hermes-agent/pull/22914))
+
+---
+
+## 📚 Documentation
+
+- **Repair Voice & TTS provider table** (@nightcityblade, fixes #24101) ([#24138](https://github.com/NousResearch/hermes-agent/pull/24138))
+- **Show per-skill pages in the left sidebar** ([#26646](https://github.com/NousResearch/hermes-agent/pull/26646))
+- **Mention Weixin in gateway help and docstrings** (salvage of #21063 by @wuwuzhijing)
+- **Richer info panels on the Skills Hub** ([#22905](https://github.com/NousResearch/hermes-agent/pull/22905))
+- Many more doc updates across providers, platforms, skills, Windows install paths, and dashboard.
+
+---
+
+## 🧪 Testing & CI
+
+- **Unblock shared PR checks** (@stephenschoettler) ([#21012](https://github.com/NousResearch/hermes-agent/pull/21012))
+- **Stabilize shared test state after 21012** (@stephenschoettler) ([#25957](https://github.com/NousResearch/hermes-agent/pull/25957))
+- A long tail of test additions for platforms, providers, plugins, and edge cases — 8 explicit `test:` PRs plus ~250 fix PRs that also added regression coverage.
+
+---
+
+## 👥 Contributors
+
+### Core
+- @teknium1 — release lead, architecture, ~406 PRs merged in window
+
+### Top community contributors
+- **@kshitijk4poor** — 38 PRs · Telegram cadence/streaming/topic routing, security hardening (sudo, SSRF, kanban_comment, dashboard auth), codex-runtime hygiene, NovitaAI provider, profile/banner fixes, Feishu update cards, gateway QOL across the board
+- **@alt-glitch** — 13 PRs · Markdown-table TUI rendering, `HERMES_SESSION_ID` env var, hindsight-client optional dep, Nix `extraDependencyGroups`
+- **@OutThisLife** (Brooklyn Nicholson) — 12 PRs · TUI turn segmentation, attach-to-gateway, markdown link titles, embedded TUI via dashboard gateway, Ink cursor sync, scroll/Esc during prompts
+- **@austinpickett** — 8 PRs · `/sessions` slash command, personality switching preserves session, cron modals, dashboard analytics
+- **@helix4u** — 5 PRs · Google Chat setup, browser install skip on system chromium, Windows Ctrl+C preservation
+- **@rob-maron** — 4 PRs · Nous Portal as model metadata authority, provider polish
+- **@stephenschoettler** — 3 PRs · CI stabilization
+- **@ethernet8023** — 3 PRs · platform/gateway work
+
+### All contributors (alphabetical)
+
+@02356abc, @0xbyt4, @0xharryriddle, @1000Delta, @1RB, @29206394, @A-kamal, @aashizpoudel, @Abd0r,
+@adybag14-cyber, @AgentArcLab, @ahmedbadr3, @AhmetArif0, @alblez, @Alex-yang00, @ALIYILD, @AllynSheep,
+@alt-glitch, @am423, @amathxbt, @amethystani, @ArecaNon, @Arkmusn, @askclaw-vesper, @AsoTora, @austinpickett,
+@aydnOktay, @ayushere, @baocin, @Bartok9, @benbarclay, @BennetYrWang, @Bihruze, @binhnt92, @briandevans,
+@brooklynnicholson, @btorresgil, @buntingszn, @CalmProton, @chrisworksai, @CoinTheHat, @dandacompany, @Dangooy,
+@DanielLSM, @David-0x221Eight, @ddupont808, @dhruv-saxena, @diablozzc, @dlkakbs, @dmahan93, @dmnkhorvath,
+@domtriola, @donrhmexe, @Dusk1e, @eloklam, @emozilla, @ephron-ren, @erenkarakus, @EthanGuo-coder,
+@ethernet8023, @evgyur, @explainanalyze, @fahdad, @fr33d3m0n, @Freeman-Consulting, @freqyfreqy, @Frowtek,
+@fu576, @github-actions[bot], @gnanirahulnutakki, @GodsBoy, @guglielmofonda, @Gutslabs, @hanzckernel,
+@heathley, @hekaru-agent, @helix4u, @HenkDz, @HiddenPuppy, @hllqkb, @hrygo, @HuangYuChuh, @Hugo-SEQUIER, @HxT9,
+@iacker, @InB4DevOps, @isaachuangGMICLOUD, @iuyup, @Jaaneek, @jackey8616, @jackjin1997, @Jaggia, @jak983464779,
+@jelrod27, @jethac, @JithendraNara, @johnisag, @Julientalbot, @Jwd-gity, @kallidean, @keyuyuan, @kfa-ai,
+@kidonng, @KiraKatana, @kjames2001, @konsisumer, @Korkyzer, @kshitijk4poor, @KvnGz, @lars-hagen, @leehack,
+@leepoweii, @LeonSGP43, @li0near, @libo1106, @liquidchen, @littlewwwhite, @liuhao1024, @liyoungc, @luandiasrj,
+@luoyuctl, @luyao618, @magic524, @mbac, @McClean, @memosr, @Mibayy, @ming1523, @mizgyo, @mrshu, @ms-alan,
+@MustafaKara7, @nederev, @nicoechaniz, @nidhi-singh02, @nightcityblade, @nik1t7n, @Ninso112, @NivOO5,
+@novax635, @nv-kasikritc, @oferlaor, @oswaldb22, @outdoorsea, @oxngon, @PaTTeeL, @pearjelly, @pefontana,
+@perng, @PhilipAD, @phuongvm, @polkn, @Prasanna28Devadiga, @princepal9120, @pty819, @purzbeats, @Quarkex,
+@quocanh261997, @qWaitCrypto, @Qwinty, @rahimsais, @raymaylee, @ReqX, @rewbs, @RhombusMaximus, @rob-maron,
+@Ruzzgar, @ryptotalent, @Sanjays2402, @shannonsands, @shaun0927, @SiliconID, @silv-mt-holdings, @simpolism,
+@smwbev, @soichiyo, @sprmn24, @steezkelly, @stephenschoettler, @Sylw3ster, @szymonclawd, @teyrebaz33,
+@Tianyu199509, @Tranquil-Flow, @TreyDong, @TurgutKural, @tw2818, @tymrtn, @uzunkuyruk, @v1b3coder,
+@vanthinh6886, @VinceZcrikl, @vKongv, @vominh1919, @voteblake, @VTRiot, @wali-reheman, @wesleysimplicio,
+@wilsen0, @WorldWriter, @worlldz, @wuli666, @wuwuzhijing, @Wysie, @XiaoXiao0221, @xieNniu, @xxxigm, @yehuosi,
+@ygd58, @yifengingit, @yuga-hashimoto, @zccyman, @ZeterMordio, @Zhekinmaksim, @zhengyn0001
+
+Also: @Nagatha (Claude Opus 4.7).
+
+---
+
+**Full Changelog**: [v2026.5.7...v2026.5.16](https://github.com/NousResearch/hermes-agent/compare/v2026.5.7...v2026.5.16)
diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py
index 0f247ddcc1f..9781c8bc689 100644
--- a/hermes_cli/__init__.py
+++ b/hermes_cli/__init__.py
@@ -14,8 +14,8 @@ Provides subcommands for:
 import os
 import sys
 
-__version__ = "0.13.0"
-__release_date__ = "2026.5.7"
+__version__ = "0.14.0"
+__release_date__ = "2026.5.16"
 
 
 def _ensure_utf8():
diff --git a/pyproject.toml b/pyproject.toml
index c1591a6a9e0..ba66d0da719 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "hermes-agent"
-version = "0.13.0"
+version = "0.14.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -48,10 +48,6 @@ dependencies = [
   "croniter==6.0.0",
   # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
   "PyJWT[crypto]==2.12.1",  # CVE-2026-32597
-  # Directly imported by WeCom/Weixin crypto paths and pulled transitively by
-  # PyJWT[crypto]; pin explicitly so the floor doesn't drift below the
-  # CVE-2026-39892 fix (buffer overflow on non-contiguous buffers).
-  "cryptography==46.0.7",  # CVE-2026-39892
   # Windows has no IANA tzdata shipped with the OS, so Python's ``zoneinfo``
   # (PEP 615) raises ``ZoneInfoNotFoundError`` for every non-UTC timezone
   # out of the box.  ``tzdata`` ships the Olson database as a data package
@@ -69,7 +65,7 @@ dependencies = [
 [project.optional-dependencies]
 # Native Anthropic provider — only needed when provider=anthropic (not via
 # OpenRouter or other aggregators).
-anthropic = ["anthropic==0.87.0"]  # CVE-2026-34450, CVE-2026-34452
+anthropic = ["anthropic==0.86.0"]
 # Web search backends — each only loaded when the user picks it as their
 # search provider (configured via `hermes tools` or config.yaml).
 exa = ["exa-py==2.10.2"]
@@ -85,9 +81,9 @@ daytona = ["daytona==0.155.0"]
 vercel = ["vercel==0.5.7"]
 hindsight = ["hindsight-client==0.6.1"]
 dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
-messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.4", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]  # aiohttp: CVE-2026-34513/34518/34519/34520/34525
+messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
-slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.4"]
+slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
 matrix = ["mautrix[encryption]==0.21.0", "Markdown==3.10.2", "aiosqlite==0.22.1", "asyncpg==0.31.0", "aiohttp-socks==0.11.0"]
 cli = ["simple-term-menu==1.6.6"]
 tts-premium = ["elevenlabs==1.59.0"]
@@ -104,8 +100,8 @@ pty = [
 ]
 honcho = ["honcho-ai==2.0.1"]
 mcp = ["mcp==1.26.0"]
-homeassistant = ["aiohttp==3.13.4"]
-sms = ["aiohttp==3.13.4"]
+homeassistant = ["aiohttp==3.13.3"]
+sms = ["aiohttp==3.13.3"]
 # Computer use — macOS background desktop control via cua-driver (MCP stdio).
 # The cua-driver binary itself is installed via `hermes tools` post-setup
 # (curl install script); this extra just pins the MCP client used to talk
@@ -214,12 +210,11 @@ hermes-acp = "acp_adapter.entry:main"
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
 
 [tool.setuptools.package-data]
-hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh"]
+hermes_cli = ["web_dist/**/*"]
 gateway = ["assets/**/*"]
-acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"]
 
 [tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "acp_adapter.*", "plugins", "plugins.*", "providers", "providers.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/scripts/release.py b/scripts/release.py
index 18d5a46123a..5d4cb3eb82f 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1081,6 +1081,11 @@ AUTHOR_MAP = {
     "nidhi2894@gmail.com": "nidhi-singh02",  # PR #2752 salvage (slack whitespace-only IndexError guard)
     "38173192+nidhi-singh02@users.noreply.github.com": "nidhi-singh02",
     "Jaaneek@users.noreply.github.com": "Jaaneek",  # PR #26457 (xAI Grok OAuth provider)
+    # v0.14.0 additions
+    "chuang.guo@hopechart.com": "wuwuzhijing",  # PR #21063 salvage (gateway docs mention Weixin)
+    "nightcityblade@gmail.com": "nightcityblade",  # PR #24138 (docs voice/tts table)
+    "pol.kuijken@gmail.com": "polkn",  # PR #6136 salvage (skill_view collision refusal)
+    "robin@soal.org": "rewbs",
 }
 
 
From f3a4af9cf2a626cb3e055766cb1cff60168d295d Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 16 May 2026 06:45:29 -0700
Subject: [PATCH 132/218] fix(acp): replay assistant reasoning as
 agent_thought_chunk on session/load (#12285) (#26943)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Persisted assistant `reasoning_content` / `reasoning` fields are now emitted
as ACP `agent_thought_chunk` notifications during `_replay_session_history`,
so editor clients (Zed, etc.) rebuild collapsed Thinking panes when the user
re-opens a session that used a thinking model.

Ordering matches live streaming: thought precedes message text within the
same assistant turn, mirroring how `reasoning_callback` deltas arrive before
`stream_delta_callback` deltas in `events.py::make_thinking_cb` /
`make_message_cb`.

Behavior on non-reasoning histories is unchanged; the replay loop's existing
text / tool_call / tool_call_update / plan emission is preserved bit-for-bit.

Closes #12285.

Credit:
- @Yukipukii1 (#14691) — original thought-replay design via
  `acp.update_agent_thought_text`; the tool-call portion of that PR has
  since landed via #19139, but the reasoning replay is theirs.
- @HenkDz (#17652 / #18578) — established the `_replay_session_history` and
  `_history_*` helper conventions this builds on.
- @D1zzyDwarf (#16531) — also closed by this work.
---
 acp_adapter/server.py    |  93 +++++++++++++-----
 tests/acp/test_server.py | 206 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 277 insertions(+), 22 deletions(-)

diff --git a/acp_adapter/server.py b/acp_adapter/server.py
index 71fce1890d1..9c8bb416999 100644
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -18,6 +18,7 @@ import acp
 from acp.schema import (
     AgentCapabilities,
     AgentMessageChunk,
+    AgentThoughtChunk,
     AuthenticateResponse,
     AvailableCommand,
     AvailableCommandsUpdate,
@@ -788,14 +789,20 @@ class HermesACPAgent(acp.Agent):
     # ---- Session management -------------------------------------------------
 
     @staticmethod
-    def _history_message_text(message: dict[str, Any]) -> str:
-        """Extract displayable text from a persisted OpenAI-style message."""
-        content = message.get("content")
-        if isinstance(content, str):
-            return content.strip()
-        if isinstance(content, list):
+    def _flatten_history_text(value: Any) -> str:
+        """Normalize a persisted text-or-text-parts value into a single string.
+
+        OpenAI-style assistant content (and provider reasoning fields) can arrive
+        as either a scalar string or a list of ``{"text": ...}`` /
+        ``{"type": "text", "content": ...}`` parts. Whitespace-only inputs
+        collapse to an empty string so callers can treat ``""`` as "nothing to
+        emit".
+        """
+        if isinstance(value, str):
+            return value.strip()
+        if isinstance(value, list):
             parts: list[str] = []
-            for item in content:
+            for item in value:
                 if isinstance(item, dict):
                     text = item.get("text")
                     if isinstance(text, str):
@@ -807,6 +814,29 @@ class HermesACPAgent(acp.Agent):
             return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
         return ""
 
+    @classmethod
+    def _history_message_text(cls, message: dict[str, Any]) -> str:
+        """Extract displayable text from a persisted OpenAI-style message."""
+        return cls._flatten_history_text(message.get("content"))
+
+    @classmethod
+    def _history_reasoning_text(cls, message: dict[str, Any]) -> str:
+        """Extract displayable reasoning/thought text from a persisted assistant message.
+
+        Returns the first non-empty value among ``reasoning_content`` (the
+        canonical field used by DeepSeek / Moonshot and the post-#16892
+        chat-completions normalizer) and ``reasoning`` (used by the codex
+        event projector and several other transports). Both keys are
+        actively written by live code paths, so neither branch is
+        deprecated — they cover different transports rather than old vs.
+        new sessions.
+        """
+        for key in ("reasoning_content", "reasoning"):
+            text = cls._flatten_history_text(message.get(key))
+            if text:
+                return text
+        return ""
+
     @staticmethod
     def _history_message_update(
         *,
@@ -827,6 +857,11 @@ class HermesACPAgent(acp.Agent):
             )
         return None
 
+    @staticmethod
+    def _history_thought_update(text: str) -> AgentThoughtChunk:
+        """Build an ACP history replay update for an assistant thought."""
+        return acp.update_agent_thought_text(text)
+
     @staticmethod
     def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
         """Extract function name/arguments from an OpenAI-style tool_call."""
@@ -858,9 +893,10 @@ class HermesACPAgent(acp.Agent):
 
         Zed's ACP history UI calls ``session/load`` after the user picks an item
         from the Agents sidebar. The agent must then replay the full conversation
-        as user/assistant chunks plus reconstructed tool-call start/completion
-        notifications; merely restoring server-side state makes Hermes remember
-        context, but leaves the editor looking like a clean thread.
+        as user/assistant chunks, thinking-mode thought chunks, plus reconstructed
+        tool-call start/completion notifications; merely restoring server-side
+        state makes Hermes remember context, but leaves the editor looking like a
+        clean thread.
         """
         if not self._conn or not state.history:
             return
@@ -882,24 +918,37 @@ class HermesACPAgent(acp.Agent):
         for message in state.history:
             role = str(message.get("role") or "")
 
-            if role in {"user", "assistant"}:
+            if role == "user":
+                text = self._history_message_text(message)
+                if text:
+                    update = self._history_message_update(role=role, text=text)
+                    if update is not None and not await _send(update):
+                        return
+                continue
+
+            if role == "assistant":
+                thought = self._history_reasoning_text(message)
+                if thought and not await _send(self._history_thought_update(thought)):
+                    return
+
                 text = self._history_message_text(message)
                 if text:
                     update = self._history_message_update(role=role, text=text)
                     if update is not None and not await _send(update):
                         return
 
-            if role == "assistant" and isinstance(message.get("tool_calls"), list):
-                for tool_call in message["tool_calls"]:
-                    if not isinstance(tool_call, dict):
-                        continue
-                    tool_call_id = self._history_tool_call_id(tool_call)
-                    if not tool_call_id:
-                        continue
-                    tool_name, args = self._history_tool_call_name_args(tool_call)
-                    active_tool_calls[tool_call_id] = (tool_name, args)
-                    if not await _send(build_tool_start(tool_call_id, tool_name, args)):
-                        return
+                tool_calls = message.get("tool_calls")
+                if isinstance(tool_calls, list):
+                    for tool_call in tool_calls:
+                        if not isinstance(tool_call, dict):
+                            continue
+                        tool_call_id = self._history_tool_call_id(tool_call)
+                        if not tool_call_id:
+                            continue
+                        tool_name, args = self._history_tool_call_name_args(tool_call)
+                        active_tool_calls[tool_call_id] = (tool_name, args)
+                        if not await _send(build_tool_start(tool_call_id, tool_name, args)):
+                            return
                 continue
 
             if role == "tool":
diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py
index 511d6e00934..a7bd0367b67 100644
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@@ -13,6 +13,7 @@ from acp.schema import (
     AgentCapabilities,
     AgentMessageChunk,
     AgentPlanUpdate,
+    AgentThoughtChunk,
     AuthenticateResponse,
     AvailableCommandsUpdate,
     Implementation,
@@ -466,6 +467,211 @@ class TestSessionOps:
             for update in updates
         )
 
+    @pytest.mark.asyncio
+    async def test_load_session_replays_reasoning_thought_before_message(self, agent):
+        """Thinking-model thoughts must be replayed via ``agent_thought_chunk``.
+
+        Regression for #12285 — when a session is loaded, persisted assistant
+        ``reasoning_content`` / ``reasoning`` fields must surface as ACP
+        ``AgentThoughtChunk`` notifications in the same relative position they
+        had live (thought streams before the assistant message text), so Zed's
+        collapsed Thinking pane rebuilds instead of vanishing on reconnect.
+        """
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [
+            {"role": "user", "content": "Walk me through it."},
+            {
+                "role": "assistant",
+                "reasoning_content": "Let me think step by step about the request.",
+                "content": "Here is the plan.",
+            },
+            {"role": "user", "content": "And the legacy case?"},
+            {
+                "role": "assistant",
+                # No reasoning_content — exercise the legacy "reasoning" fallback
+                # path so sessions persisted before #16892 still replay thoughts.
+                "reasoning": "Older sessions stored the trace under the internal key.",
+                "content": "Same idea, older field name.",
+            },
+        ]
+
+        mock_conn.session_update.reset_mock()
+        resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        assert isinstance(resp, LoadSessionResponse)
+
+        replay_kinds = [
+            getattr(call.kwargs.get("update"), "session_update", None)
+            for call in mock_conn.session_update.await_args_list
+            if getattr(call.kwargs.get("update"), "session_update", None)
+            in {"user_message_chunk", "agent_message_chunk", "agent_thought_chunk"}
+        ]
+        assert replay_kinds == [
+            "user_message_chunk",
+            "agent_thought_chunk",
+            "agent_message_chunk",
+            "user_message_chunk",
+            "agent_thought_chunk",
+            "agent_message_chunk",
+        ]
+
+        thought_updates = [
+            call.kwargs["update"]
+            for call in mock_conn.session_update.await_args_list
+            if isinstance(call.kwargs.get("update"), AgentThoughtChunk)
+        ]
+        assert len(thought_updates) == 2
+        assert thought_updates[0].content.text == "Let me think step by step about the request."
+        assert thought_updates[1].content.text == "Older sessions stored the trace under the internal key."
+
+    @pytest.mark.asyncio
+    async def test_load_session_replays_reasoning_only_turn(self, agent):
+        """Assistant turns with reasoning but no content should still emit a thought.
+
+        Pure reasoning-only assistant entries (e.g. a thinking step before a
+        tool-call turn) commonly carry ``reasoning_content`` with empty
+        ``content``. The replay must still surface the thought so the editor's
+        Thinking pane rebuilds, even when there is no message text to follow.
+        """
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [
+            {
+                "role": "assistant",
+                "reasoning_content": "I should call the search tool next.",
+                "content": "",
+            },
+        ]
+
+        mock_conn.session_update.reset_mock()
+        await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        thought_updates = [
+            call.kwargs["update"]
+            for call in mock_conn.session_update.await_args_list
+            if isinstance(call.kwargs.get("update"), AgentThoughtChunk)
+        ]
+        message_updates = [
+            call.kwargs["update"]
+            for call in mock_conn.session_update.await_args_list
+            if isinstance(call.kwargs.get("update"), AgentMessageChunk)
+        ]
+        assert len(thought_updates) == 1
+        assert thought_updates[0].content.text == "I should call the search tool next."
+        assert message_updates == []
+
+    @pytest.mark.asyncio
+    async def test_load_session_skips_empty_reasoning_fields(self, agent):
+        """Empty/whitespace reasoning fields must not produce notifications."""
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [
+            {
+                "role": "assistant",
+                "reasoning_content": "",
+                "reasoning": "   \n\t",
+                "content": "Just a regular answer.",
+            },
+        ]
+
+        mock_conn.session_update.reset_mock()
+        await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        thought_updates = [
+            call.kwargs["update"]
+            for call in mock_conn.session_update.await_args_list
+            if isinstance(call.kwargs.get("update"), AgentThoughtChunk)
+        ]
+        assert thought_updates == []
+
+    @pytest.mark.asyncio
+    async def test_load_session_replays_thought_then_tool_call_without_message(self, agent):
+        """Canonical thinking-model shape: reasoning + tool_call + no body text.
+
+        Thinking models commonly emit a pre-tool thought followed by a
+        tool_calls turn with empty ``content``. Replay must emit:
+        ``agent_thought_chunk`` then ``tool_call`` then ``tool_call_update``
+        for the matching tool result — and crucially, NO ``agent_message_chunk``
+        for the empty-text assistant body. Regression for the canonical
+        thinking-then-tool flow on #12285.
+        """
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [
+            {"role": "user", "content": "Find the bug."},
+            {
+                "role": "assistant",
+                "reasoning_content": "I should grep for the function name first.",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_grep_1",
+                        "type": "function",
+                        "function": {
+                            "name": "search_files",
+                            "arguments": '{"pattern":"foo","path":"."}',
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_grep_1",
+                "content": '{"total_count":1,"matches":[{"path":"x.py","line":1,"content":"foo"}]}',
+            },
+        ]
+
+        mock_conn.session_update.reset_mock()
+        await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        kinds = [
+            getattr(call.kwargs.get("update"), "session_update", None)
+            for call in mock_conn.session_update.await_args_list
+            if getattr(call.kwargs.get("update"), "session_update", None)
+            in {
+                "user_message_chunk",
+                "agent_thought_chunk",
+                "agent_message_chunk",
+                "tool_call",
+                "tool_call_update",
+            }
+        ]
+        # No agent_message_chunk for the empty-content assistant turn.
+        assert "agent_message_chunk" not in kinds
+        # Thought must precede the tool_call_start within the assistant turn,
+        # and the tool result follows.
+        assert kinds == [
+            "user_message_chunk",
+            "agent_thought_chunk",
+            "tool_call",
+            "tool_call_update",
+        ]
+
     @pytest.mark.asyncio
     async def test_load_session_schedules_history_replay_after_response(self, agent):
         """Zed only attaches replayed updates after session/load has completed."""

From 3034eee38ec516109566c00975be4d0276747c34 Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sat, 16 May 2026 07:41:34 -0700
Subject: [PATCH 133/218] fix(acp): replay session history before responding to
 session/load (#12285 follow-up) (#26957)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switches `_replay_session_history` from `loop.call_soon`-deferred (after the
`LoadSessionResponse` is written) to `await`-inline (before the response is
constructed) for both `session/load` and `session/resume`. Adds defensive
try/except around the awaited call so a replay helper crash still yields a
successful load response — partial transcripts are acceptable, total
load failure is not.

The deferral was added on May 2 in commit 19854c7cd with the rationale "Zed
only attaches streamed transcript/tool updates once the load/resume response
has completed." That justification was incorrect:

- Zed's current ACP integration (zed-industries/zed
  crates/agent_servers/src/acp.rs) explicitly registers the session-update
  routing entry BEFORE awaiting the loadSession RPC, with the comment:
  "so that any session/update notifications that arrive during the call
  (e.g. history replay during session/load) can find the thread."
- Every other reference ACP server (Codex, Claude Code, OpenCode, Pi, agentao)
  replays history BEFORE responding to the load request.
- The ACP spec wording ("Stream the entire conversation history back to the
  client via notifications") and the natural JSON-RPC reading both mean
  "during the request's lifetime", not "after the response resolves".

Empirical reproduction (reported by Biraj on @agentclientprotocol/sdk
v0.21.1): the same custom ACP client works correctly against Codex /
Claude Code / OpenCode / Pi but receives 0 notifications from Hermes
because it measures the per-call notification count at the moment
`loadSession` resolves — which on Hermes was before the `call_soon`-
scheduled replay coroutine had a chance to run.

Changes:
- `acp_adapter/server.py`: remove `_schedule_history_replay`; both
  `load_session` and `resume_session` now `await self._replay_session_history`
  before returning, wrapped in try/except that logs and continues on
  helper exceptions.
- `tests/acp/test_server.py`: replace the single
  `test_load_session_schedules_history_replay_after_response`
  (which encoded the now-incorrect post-response ordering) with two tests
  asserting `events == ["replay", "returned"]` for load and resume.
  Add two regression tests confirming that a replay helper raising still
  yields a `LoadSessionResponse` / `ResumeSessionResponse` rather than
  propagating the exception out as a JSON-RPC error.

Result: 240 ACP tests pass (was 238), ruff clean. Verified end-to-end:
biraj's synchronous notification-counter pattern now sees 6 notifications
during `loadSession` for a 5-message session, matching all other reference
ACP servers.

The `_fenced_text` change in `acp_adapter/tools.py` from the same May 2
commit is orthogonal and intentionally left intact — it's a separate,
still-valid fix for Zed's pipe-as-table rendering.

Refs #12285. Follows up #26943 (which added thought-chunk replay but kept
the deferral).
---
 acp_adapter/server.py    | 67 ++++++++++++++++++++++----------
 tests/acp/test_server.py | 84 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 121 insertions(+), 30 deletions(-)

diff --git a/acp_adapter/server.py b/acp_adapter/server.py
index 9c8bb416999..3031de161fd 100644
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -889,14 +889,17 @@ class HermesACPAgent(acp.Agent):
         ).strip()
 
     async def _replay_session_history(self, state: SessionState) -> None:
-        """Send persisted user/assistant history to clients during session/load.
+        """Replay persisted user/assistant history during session/load or session/resume.
 
-        Zed's ACP history UI calls ``session/load`` after the user picks an item
-        from the Agents sidebar. The agent must then replay the full conversation
-        as user/assistant chunks, thinking-mode thought chunks, plus reconstructed
-        tool-call start/completion notifications; merely restoring server-side
-        state makes Hermes remember context, but leaves the editor looking like a
-        clean thread.
+        Invoked inline (``await``) from both ``load_session`` and
+        ``resume_session`` so that spec-compliant ACP clients receive the
+        full transcript within the request's lifetime — see the comment at
+        the call sites for the rationale and prior-art citations.
+
+        Replays the conversation as user/assistant chunks, thinking-mode
+        thought chunks, plus reconstructed tool-call start/completion
+        notifications. Merely restoring server-side state makes Hermes
+        remember context, but leaves the editor looking like a clean thread.
         """
         if not self._conn or not state.history:
             return
@@ -991,18 +994,6 @@ class HermesACPAgent(acp.Agent):
             models=self._build_model_state(state),
         )
 
-    def _schedule_history_replay(self, state: SessionState) -> None:
-        """Replay persisted history after session/load or session/resume returns.
-
-        Zed only attaches streamed transcript/tool updates once the load/resume
-        response has completed. Sending replay notifications while the request is
-        still in-flight can make the server look correct in logs while the editor
-        drops or fails to attach the tool-call history.
-        """
-        loop = asyncio.get_running_loop()
-        replay_coro = self._replay_session_history(state)
-        loop.call_soon(asyncio.create_task, replay_coro)
-
     async def load_session(
         self,
         cwd: str,
@@ -1016,7 +1007,30 @@ class HermesACPAgent(acp.Agent):
             return None
         await self._register_session_mcp_servers(state, mcp_servers)
         logger.info("Loaded session %s", session_id)
-        self._schedule_history_replay(state)
+        # Per ACP spec, `session/load` must stream the prior conversation back
+        # to the client via `session/update` notifications BEFORE responding,
+        # so the client receives the full transcript within the load request's
+        # lifetime. Awaiting the replay here matches Codex / Claude Code /
+        # OpenCode / Pi and the Zed client (which registers the session-update
+        # routing entry before awaiting the loadSession RPC specifically so
+        # in-call history replay updates can find the thread). Deferring this
+        # via `loop.call_soon` (as we did briefly in May 2026) broke every
+        # spec-compliant ACP client that measures notifications synchronously
+        # against the load response — see #12285 follow-up.
+        try:
+            await self._replay_session_history(state)
+        except Exception:
+            # Replay is best-effort — a corrupted or unexpected message shape
+            # must not turn a successful session/load into a JSON-RPC error
+            # response. Per-notification failures are already caught inside
+            # ``_replay_session_history``; this outer guard covers anything
+            # raised by the helpers themselves before reaching ``_send``.
+            logger.warning(
+                "ACP history replay raised during session/load for %s — "
+                "load will still succeed, partial transcript may be missing",
+                session_id,
+                exc_info=True,
+            )
         self._schedule_available_commands_update(session_id)
         self._schedule_usage_update(state)
         return LoadSessionResponse(models=self._build_model_state(state))
@@ -1034,7 +1048,18 @@ class HermesACPAgent(acp.Agent):
             state = self.session_manager.create_session(cwd=cwd)
         await self._register_session_mcp_servers(state, mcp_servers)
         logger.info("Resumed session %s", state.session_id)
-        self._schedule_history_replay(state)
+        # See `load_session` above for the spec rationale — replay must
+        # complete before the response so clients receive the full transcript
+        # within the request's lifetime.
+        try:
+            await self._replay_session_history(state)
+        except Exception:
+            logger.warning(
+                "ACP history replay raised during session/resume for %s — "
+                "resume will still succeed, partial transcript may be missing",
+                state.session_id,
+                exc_info=True,
+            )
         self._schedule_available_commands_update(state.session_id)
         self._schedule_usage_update(state)
         return ResumeSessionResponse(models=self._build_model_state(state))
diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py
index a7bd0367b67..65dd6fd6b72 100644
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@@ -673,25 +673,91 @@ class TestSessionOps:
         ]
 
     @pytest.mark.asyncio
-    async def test_load_session_schedules_history_replay_after_response(self, agent):
-        """Zed only attaches replayed updates after session/load has completed."""
+    async def test_load_session_replays_history_before_returning_response(self, agent):
+        """Per ACP spec, replay must complete BEFORE load_session returns.
+
+        Spec-compliant ACP clients (Codex, Claude Code, OpenCode, Pi, Zed)
+        attach their ``session/update`` listeners before awaiting the
+        ``loadSession`` RPC and rely on receiving the full transcript within
+        the request's lifetime. Deferring replay via ``loop.call_soon`` (the
+        prior behavior in May 2026) broke clients that read notification
+        counts synchronously against the load response — see #12285 follow-up.
+        """
         new_resp = await agent.new_session(cwd="/tmp")
         state = agent.session_manager.get_session(new_resp.session_id)
         state.history = [{"role": "user", "content": "hello from history"}]
-        events = []
+        events: list[str] = []
 
-        async def replay_after_response(_state):
+        async def replay_records(_state):
             events.append("replay")
 
-        with patch.object(agent, "_replay_session_history", side_effect=replay_after_response):
+        with patch.object(agent, "_replay_session_history", side_effect=replay_records):
             resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
             events.append("returned")
 
         assert isinstance(resp, LoadSessionResponse)
-        assert events == ["returned"]
-        await asyncio.sleep(0)
-        await asyncio.sleep(0)
-        assert events == ["returned", "replay"]
+        # Replay must have happened BEFORE the response was constructed —
+        # i.e. before the `events.append("returned")` after the await resolves.
+        assert events == ["replay", "returned"]
+
+    @pytest.mark.asyncio
+    async def test_resume_session_replays_history_before_returning_response(self, agent):
+        """Same spec rationale as ``load_session`` — replay before responding."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [{"role": "user", "content": "hello from history"}]
+        events: list[str] = []
+
+        async def replay_records(_state):
+            events.append("replay")
+
+        with patch.object(agent, "_replay_session_history", side_effect=replay_records):
+            resp = await agent.resume_session(cwd="/tmp", session_id=new_resp.session_id)
+            events.append("returned")
+
+        assert isinstance(resp, ResumeSessionResponse)
+        assert events == ["replay", "returned"]
+
+    @pytest.mark.asyncio
+    async def test_load_session_survives_replay_helper_exception(self, agent, caplog):
+        """A replay helper raising must not turn load_session into an error.
+
+        With awaited replay, an exception in ``_replay_session_history`` now
+        propagates into the ``load_session`` handler. The defensive try/except
+        guard at the call site must catch and log it so the JSON-RPC client
+        still receives a ``LoadSessionResponse`` — partial transcripts are
+        acceptable, total load failure is not.
+        """
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [{"role": "user", "content": "hi"}]
+
+        async def boom(_state):
+            raise RuntimeError("simulated replay helper crash")
+
+        with caplog.at_level("WARNING", logger="acp_adapter.server"):
+            with patch.object(agent, "_replay_session_history", side_effect=boom):
+                resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+
+        assert isinstance(resp, LoadSessionResponse)
+        assert "history replay raised during session/load" in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_resume_session_survives_replay_helper_exception(self, agent, caplog):
+        """Same guarantee as ``load_session`` for the resume path."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [{"role": "user", "content": "hi"}]
+
+        async def boom(_state):
+            raise RuntimeError("simulated replay helper crash")
+
+        with caplog.at_level("WARNING", logger="acp_adapter.server"):
+            with patch.object(agent, "_replay_session_history", side_effect=boom):
+                resp = await agent.resume_session(cwd="/tmp", session_id=new_resp.session_id)
+
+        assert isinstance(resp, ResumeSessionResponse)
+        assert "history replay raised during session/resume" in caplog.text
 
     @pytest.mark.asyncio
     async def test_resume_session_creates_new_if_missing(self, agent):

From 683698742852ce0455f3a07b12c772c786d5a2ae Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 11:18:06 -0700
Subject: [PATCH 134/218] docs(release): rewrite v0.14.0 highlights for
 excitement framing (#27035)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* chore: release v0.14.0 (2026.5.16)

The Foundation Release — Hermes installs and runs anywhere now.

Highlights:
- Native Windows support (early beta) — PowerShell installer, native subprocess/PTY paths, ~40 follow-up Windows-only fixes
- pip install hermes-agent — PyPI wheel
- Cold-start wave — ~19s off hermes launch, 180x faster browser_console (CDP WS)
- Supply-chain advisory checker + lazy-deps + tiered install fallback
- OpenAI-compatible local proxy for OAuth providers (Claude Pro, ChatGPT Pro, SuperGrok)
- Cross-session 1h Claude prompt cache (Anthropic / OpenRouter / Nous Portal)
- 2 new platforms: LINE + SimpleX Chat (22 total)
- Microsoft Graph foundation — Teams pipeline + webhook adapter
- /handoff actually transfers sessions live
- x_search first-class tool, vision_analyze pixel passthrough
- LSP semantic diagnostics on every write
- Unified video_generate with pluggable backends
- computer_use cua-driver backend
- 9 new optional skills, OpenRouter Pareto Code router, xAI Grok OAuth
- 12 P0 + 50 P1 closures

808 commits · 633 PRs · 1393 files · 165k insertions · 545 issues closed · 215 contributors

* docs(release): rewrite v0.14.0 highlights for excitement framing

Demote Windows beta from headline; lead with SuperGrok / OAuth proxy /
x_search / Microsoft Teams. Frame lazy-deps as a debloating wave that
makes installs dramatically lighter. Add highlights for clickable URLs
in any terminal, dangerous-command detection bypasses, ChatGPT Pro
and SuperGrok via the local proxy. Tighten the summary paragraph.
---
 RELEASE_v0.14.0.md | 67 +++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/RELEASE_v0.14.0.md b/RELEASE_v0.14.0.md
index 38d40db8c69..9a5ee4ba8c8 100644
--- a/RELEASE_v0.14.0.md
+++ b/RELEASE_v0.14.0.md
@@ -3,74 +3,79 @@
 **Release Date:** May 16, 2026
 **Since v0.13.0:** 808 commits · 633 merged PRs · 1393 files changed · 165,061 insertions · 545 issues closed (12 P0, 50 P1) · 215 community contributors (including co-authors)
 
-> The Foundation Release — Hermes Agent installs and runs anywhere now. Native Windows ships in early beta with a full PowerShell installer story, a `pip install hermes-agent` wheel lands on PyPI, lazy-deps reshape what `pip install hermes-agent` actually pulls down, the supply-chain checker scans every install/upgrade for unsafe versions, and a new OpenAI-compatible local proxy lets Codex / Aider / Cline talk to OAuth-only providers (Claude Pro, ChatGPT Pro, SuperGrok). The cold-start wave shaves ~19 seconds off `hermes` launch, browser-tool CDP calls run 180x faster, and `hermes tools` All-Platforms drops from 14s to under 1.5s. Two new messaging platforms (LINE and SimpleX Chat) and a Microsoft Graph foundation (Teams pipeline + webhook adapter) land alongside `/handoff` that finally transfers sessions live, `vision_analyze` passing pixels through to vision-capable models, `x_search` as a first-class tool, LSP semantic diagnostics on every `write_file` / `patch`, a unified pluggable `video_generate`, a `computer_use` cua-driver backend, cross-session 1-hour Claude prompt caching, a per-turn file-mutation verifier, plus 9 new optional skills. 50+ P1 closures, 12 P0 closures.
+> The Foundation Release — Hermes installs and runs anywhere, ships with the things you actually want to use, and stops shipping the things you don't. xAI Grok lands as a SuperGrok OAuth provider with grok-4.3 bumped to a 1M context window. A new OpenAI-compatible local proxy turns any OAuth-authed Hermes provider — Claude Pro, ChatGPT Pro, SuperGrok — into an endpoint that Codex / Aider / Cline / Continue can hit. `x_search` lands as a first-class X (Twitter) search tool with OAuth-or-API-key auth. The Microsoft Teams stack is wired end-to-end (Graph auth + webhook listener + pipeline runtime + outbound delivery). A debloating wave makes installs dramatically lighter — heavyweight backends now lazy-install on first use, the `[all]` extras drop everything covered by lazy-deps, and a tiered install falls back when a wheel rejects on your platform. `pip install hermes-agent` works from PyPI. The cold-start wave shaves ~19 seconds off `hermes` launch. Browser CDP calls are 180x faster. Two new messaging platforms (LINE + SimpleX Chat) bring the total to 22. Cross-session 1-hour Claude prompt caching, `/handoff` that actually transfers sessions live, native button UI for `clarify` on Telegram and Discord, Discord channel history backfill, LSP semantic diagnostics on every write, a unified pluggable `video_generate`, a `computer_use` cua-driver backend that finally works with non-Anthropic providers, clickable URLs in any terminal, Zed ACP Registry integration via `uvx`, native Windows beta, 9 new optional skills, OpenRouter Pareto Code router, huggingface/skills as a trusted default tap. 12 P0 + 50 P1 closures.
 
 ---
 
 ## ✨ Highlights
 
-- **Native Windows support (early beta)** — full PowerShell installer, native subprocess/PTY paths, taskkill-based process management, MinGit auto-install, Microsoft Store python stub detection, foreground Ctrl+C preservation, taskkill+ps2 fallback, npm prefix handling, and ~40 follow-up Windows-only fixes across CLI / gateway / TUI / curator / tools. Hermes finally runs natively on `cmd.exe` and PowerShell, no WSL required. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561), [#22130](https://github.com/NousResearch/hermes-agent/pull/22130), [#22752](https://github.com/NousResearch/hermes-agent/pull/22752), [#26618](https://github.com/NousResearch/hermes-agent/pull/26618), and many more)
+- **xAI Grok via SuperGrok OAuth — and grok-4.3 jumps to a 1M context window** — Sign in with your xAI account, talk to Grok models from Hermes. The OAuth provider ships with proper entitlement-error handling, prelude-SSE recovery, and reasoning replay gating. grok-4.3's context bumps to 1M as part of the wire-through. Includes an SSH-to-tunnel docs page for remote-machine OAuth flows. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534), [#26664](https://github.com/NousResearch/hermes-agent/pull/26664), [#26644](https://github.com/NousResearch/hermes-agent/pull/26644), [#26592](https://github.com/NousResearch/hermes-agent/pull/26592))
 
-- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. One command, no clone, no git, no shell installer. Wheel includes the Ink TUI bundle and shell launcher. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
+- **OpenAI-compatible local proxy for OAuth providers** — `hermes proxy` exposes any OAuth-authed Hermes provider — Claude Pro, ChatGPT Pro, SuperGrok — as an OpenAI-compatible HTTP endpoint. Point Codex / Aider / Cline / Continue at `http://localhost:port` and your subscription works in any OpenAI-compatible tool. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
 
-- **Cold-start performance wave — ~19s off `hermes` launch** — skills cache, lazy Feishu import, no Nous HTTP at startup, plus PEP-562 lazy adapter imports (QQ, Yuanbao, Teams, Google Chat), deferred `fal_client` / `google-cloud` / `httpx` loads, models.dev disk-cache-first lookup, parallel doctor API checks, eager-skip plugin discovery on built-in subcommands, `hermes tools` All-Platforms drops from 14s to <1.5s, welcome banner skipped on `chat -q`. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
+- **`x_search` — first-class X (Twitter) search tool** — gated tool with OAuth-or-API-key auth, no skill required. Search the timeline, find threads, surface posts — straight from the agent. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
+
+- **Microsoft Teams — end-to-end** — Hermes can now read and post to Teams. The full Microsoft Graph foundation lands together: auth + client foundation, webhook listener platform, Teams pipeline plugin runtime, and Teams outbound delivery via the existing adapter. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
+
+- **Debloating wave — lighter installs, less you don't need** — the heavy backends (Slack / Matrix / Feishu / DingTalk adapters, hindsight client, codex app-server, Pixverse / Camofox / image-gen SDKs, voice/TTS providers) now lazy-install on first use. The `[all]` extras drop everything covered by lazy-deps so a default install ships with vastly fewer wheels. A tiered install falls back through extras tiers when one rejects on the target platform. The supply-chain advisory checker scans every install/upgrade against an advisory list. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220), [#24515](https://github.com/NousResearch/hermes-agent/pull/24515), [#25014](https://github.com/NousResearch/hermes-agent/pull/25014), [#25038](https://github.com/NousResearch/hermes-agent/pull/25038), [#25766](https://github.com/NousResearch/hermes-agent/pull/25766), [#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
+
+- **`pip install hermes-agent && hermes`** — Hermes Agent is a real PyPI package now. One command, no clone, no git, no shell installer. The wheel ships with the Ink TUI bundle and the shell launcher. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593), [#26148](https://github.com/NousResearch/hermes-agent/pull/26148))
+
+- **Cross-session 1h Claude prompt cache** — Anthropic, OpenRouter, and Nous Portal now share a 1-hour prefix cache across sessions for Claude models. `/new` and `/resume` hit cache. Background memory review hits cache. The system prompt is now byte-static within a session so cache layout never invalidates mid-conversation. Lower cost, faster first token. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828), [#25434](https://github.com/NousResearch/hermes-agent/pull/25434), [#24778](https://github.com/NousResearch/hermes-agent/pull/24778))
 
 - **180x faster `browser_console` evaluations** — routed through the supervisor's persistent CDP WebSocket instead of spawning a fresh DevTools session per call. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
 
-- **Supply-chain advisory checker + lazy-deps framework + tiered install fallback** — every `pip install` / `hermes update` scans dependencies against an advisory list, lazy-deps replace heavy import-time loads with first-use installs, and the installer falls back through extras tiers when a wheel rejects on the target platform. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
+- **Cold-start performance wave — ~19s off `hermes` launch** — skills cache + lazy Feishu + no Nous HTTP at startup, plus PEP-562 lazy adapter imports (QQ, Yuanbao, Teams, Google Chat), deferred `fal_client` / `google-cloud` / `httpx` loads, models.dev disk-cache-first lookup, parallel doctor API checks, eager-skip plugin discovery on built-in subcommands. `hermes tools` All-Platforms drops from 14s to <1.5s. Welcome banner skipped on `chat -q`. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
 
-- **OpenAI-compatible local proxy** — `hermes proxy` exposes any OAuth-authed provider (Claude Pro, ChatGPT Pro, SuperGrok) as an OpenAI-compatible endpoint that Codex / Aider / Cline / VS Code Continue can hit. Your subscription, your tools. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
-
-- **Cross-session 1-hour Claude prompt cache** — Anthropic / OpenRouter / Nous Portal now share a 1h prefix cache across sessions for Claude models. Fast resume, fast `/new`, lower cost on repeat work. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828))
-
-- **Two new messaging platforms — LINE + SimpleX Chat** — LINE Messaging API lands as a first-class platform, SimpleX Chat salvages #2558 onto the modern adapter spec. Hermes is now on 22 platforms. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
-
-- **Microsoft Graph foundation — Teams pipeline + webhook adapter** — `msgraph` auth/client foundation, webhook listener platform, Teams pipeline plugin runtime, and Teams outbound delivery via the existing adapter — Hermes can now read and post to Teams. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
+- **Two new messaging platforms — LINE + SimpleX Chat** — LINE Messaging API as a first-class platform, SimpleX Chat salvages #2558 onto the modern adapter spec. Hermes is now on 22 platforms. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
 
 - **`/handoff` actually transfers the session live** — the agent's active session moves to a different model / persona / profile mid-conversation, with messages, tool history, and context preserved. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
 
-- **`x_search` — first-class X (Twitter) search tool** — gated tool with OAuth-or-API-key auth, no skill needed to query the timeline. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
+- **Native button UI for `clarify` on Telegram and Discord** — the `clarify` tool renders multi-choice prompts as platform-native inline keyboards on Telegram and buttons on Discord instead of typed responses. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
+
+- **Discord channel history backfill (default on)** — Hermes reads recent channel history when joining a thread so it actually knows what's already been said before responding. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
 
 - **`vision_analyze` returns pixels to vision-capable models** — when the active model can see, `vision_analyze` now hands the image straight through instead of falling back to a text description. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
 
-- **LSP semantic diagnostics on every write** — `write_file` and `patch` now run real language-server diagnostics on the post-edit file (delta-only) and surface real errors before they ship downstream. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
-
 - **Per-turn file-mutation verifier footer** — after every turn that wrote files, the agent gets a verifier footer summarizing what actually changed on disk — catches silent overwrites and "wrote it but it didn't land" bugs. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
 
+- **LSP semantic diagnostics on every write** — `write_file` and `patch` run real language-server diagnostics on the post-edit file (delta-only) and surface real errors before they ship downstream. Goes well beyond the in-proc Python/JSON/YAML/TOML linters from v0.13.0. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
+
 - **Unified `video_generate` with pluggable provider backends** — single tool, any backend. Drop in a new video provider as a plugin, no core changes. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
 
-- **`computer_use` cua-driver backend** — proper focus-safe ops, non-Anthropic provider support, refresh on `hermes update`. Computer-use is no longer locked to a single SDK. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
+- **`computer_use` cua-driver backend — works with non-Anthropic models now** — proper focus-safe ops, non-Anthropic provider support, refresh on `hermes update`. Computer-use is no longer locked to a single SDK. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
 
-- **xAI Grok OAuth provider — SuperGrok via subscription** — sign in with your xAI account, talk to Grok models from Hermes. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534))
+- **Clickable URLs in any terminal** — links in agent output now render as proper OSC8 hyperlinks with hover-highlight, in any terminal that supports them. Click to open. (@OutThisLife) ([#25071](https://github.com/NousResearch/hermes-agent/pull/25071), [#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
 
-- **Clarify with buttons — native inline keyboards on Telegram + Discord** — the `clarify` tool renders multi-choice prompts as platform-native buttons instead of typed responses. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
+- **Zed ACP Registry — `uvx` install in one click** — Hermes is in the Zed registry, installable via `uvx` (no npm needed). Plus `hermes acp --setup-browser` bootstraps browser tools for registry installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
 
-- **Discord channel history backfill (default on)** — Hermes reads recent channel history when joining a thread so it actually knows what's been said. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
+- **OpenRouter Pareto Code router with `min_coding_score` knob** — pick the cheapest model that meets your quality bar. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
 
-- **Watchers skill — RSS / HTTP JSON / GitHub polling via cron `no_agent` mode** — skill recipes that wire change-detection sources directly into cron's script-only watchdog mode. ([#21881](https://github.com/NousResearch/hermes-agent/pull/21881))
+- **NovitaAI as a new model provider** (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
 
-- **Zed ACP Registry integration + uvx distribution** — Hermes is in the Zed registry, installable via `uvx` (no npm). Plus `hermes acp --setup-browser` bootstraps browser tools for registry installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
+- **Codex app-server runtime for OpenAI/Codex models** — optional runtime that drives OpenAI's Codex CLI under the hood for OpenAI/Codex paths, with session reuse, wedge retirement, and OAuth refresh classification. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
 
-- **OpenRouter Pareto Code router** — wire a new OpenRouter router with `min_coding_score` knob. Pick the cheapest model that meets your quality bar. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
+- **`huggingface/skills` as a trusted default tap** — community skills index from huggingface.co/skills is available by default in the Skills Hub. (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
 
-- **Optional codex app-server runtime for OpenAI/Codex models** — drives the OpenAI Codex CLI under the hood for OpenAI/Codex paths, with session reuse, wedge retirement, and OAuth refresh classification. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
-
-- **`hermes-skills/huggingface` as a trusted default tap** — community skills index from huggingface.co/skills is available by default in the Skills Hub. ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
-
-- **9 new optional skills** — Hyperliquid (perp/spot trading via SDK + REST) (@kshitijk4poor & Hermes), Yahoo Finance market data, api-testing (REST/GraphQL debug), unified EVM multi-chain skill (folds #25291 + #2010 + base/), darwinian-evolver, osint-investigation (closes #355), pinggy-tunnel, watchers (RSS/HTTP/GitHub via cron), Notion overhaul for the Developer Platform (May 2026). ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
+- **9 new optional skills** — Hyperliquid (perp/spot trading via SDK + REST), Yahoo Finance market data, api-testing (REST/GraphQL debug), unified EVM multi-chain (folds #25291 + #2010 + base/), darwinian-evolver, osint-investigation (closes #355), pinggy-tunnel, watchers (RSS/HTTP/GitHub via cron `no_agent`), Notion overhaul for the May 2026 Developer Platform. ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
 
 - **API server exposes run approval events** — long-running runs surface approval requests over the API stream, no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
 
-- **`/subgoal` — user-added criteria appended to active `/goal`** — layer extra success criteria onto a running goal loop. The judge sees them in the prompt, no behavior change when subgoals are empty. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
-
-- **Plugins can run any LLM call via `ctx.llm`** — plugins get a first-class hook to make their own LLM requests through the active provider/credentials, no manual wiring. Plus `tool_override` flag for replacing built-in tools. ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
+- **Plugins can run any LLM call via `ctx.llm` + `tool_override`** — plugins get a first-class hook to make their own LLM requests through the active provider/credentials, plus a `tool_override` flag for replacing built-in tools (closes #11049). ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
 
 - **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — two new free search backends alongside Tavily / SearXNG / Exa. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
 
-- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS classification** — closes the `sudo -S` brute-force avenue; approval gates classify stdin-fed and askpass-stripped sudo invocations as dangerous. (salvages of #22194 + #21128) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
+- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS classification** — closes the `sudo -S` brute-force avenue; approval gates classify stdin-fed and askpass-stripped sudo invocations as dangerous. (salvages of #22194 + #21128) (@kshitijk4poor) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
+
+- **Closes 3 dangerous-command detection bypasses + sanitizes tool error strings before injection** — inspired by Claude Code's command-detection work; closes 3 known bypasses and adds tool-error sanitization to stop adversarial output from re-entering the model context. ([#26829](https://github.com/NousResearch/hermes-agent/pull/26829), [#26823](https://github.com/NousResearch/hermes-agent/pull/26823))
+
+- **`/subgoal` — user-added criteria appended to active `/goal`** — layer extra success criteria onto a running goal loop. The judge sees them in the prompt, no behavior change when subgoals are empty. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
 
 - **Provider rename — Alibaba Cloud → Qwen Cloud, picker reorder** — matches what the world calls it. Existing config keys still work. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
 
+- **Native Windows support (early beta)** — Hermes runs natively on `cmd.exe` and PowerShell. Full PowerShell installer, native subprocess/PTY paths, taskkill-based process management, MinGit auto-install, foreground Ctrl+C preservation, ~40 follow-up Windows-only fixes. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
+
+---
 
 ---
 

From 35f25523c60d9b1174c9a5d901e34f2300d81986 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 11:53:13 -0700
Subject: [PATCH 135/218] docs(tools): add video_generate / video_gen toolset
 to user-facing tool docs (#27050)

The video_gen toolset and its video_generate tool shipped without
user-facing reference docs. toolsets-reference.md and the dev-guide
plugin page were already in, but reference/tools-reference.md had no
video_gen section at all and user-guide/features/tools.md's Media row
didn't list video_generate.

- reference/tools-reference.md: add a video_gen section after video,
  including backend list (xAI Grok-Imagine, FAL.ai Veo/Pixverse/Kling),
  unified text-to-video / image-to-video surface note, link to the
  dev-guide plugin page, and the video_generate tool row. Add
  video_generate to the standalone-tools quick-counts line.
- user-guide/features/tools.md: extend Media row with video_generate
  and video_analyze plus an opt-in caveat.
---
 website/docs/reference/tools-reference.md | 17 ++++++++++++++++-
 website/docs/user-guide/features/tools.md |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 507bd307afb..64cf5e2dc09 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -8,7 +8,7 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 This page documents Hermes' built-in tools, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts (current registry):** ~70 tools — 10 browser tools (core) + 2 CDP-gated browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, 7 Spotify tools (registered by the bundled `spotify` plugin), 5 Yuanbao tools, 7 kanban tools (registered when the kanban dispatcher spawns the agent), 2 Discord tools, and a handful of standalone tools (`memory`, `clarify`, `delegate_task`, `execute_code`, `cronjob`, `session_search`, `skill_view`/`skill_manage`/`skills_list`, `text_to_speech`, `image_generate`, `vision_analyze`, `video_analyze`, `mixture_of_agents`, `send_message`, `todo`, `computer_use`, `process`).
+**Quick counts (current registry):** ~70 tools — 10 browser tools (core) + 2 CDP-gated browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, 7 Spotify tools (registered by the bundled `spotify` plugin), 5 Yuanbao tools, 7 kanban tools (registered when the kanban dispatcher spawns the agent), 2 Discord tools, and a handful of standalone tools (`memory`, `clarify`, `delegate_task`, `execute_code`, `cronjob`, `session_search`, `skill_view`/`skill_manage`/`skills_list`, `text_to_speech`, `image_generate`, `video_generate`, `vision_analyze`, `video_analyze`, `mixture_of_agents`, `send_message`, `todo`, `computer_use`, `process`).
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with the prefix `mcp_<server>_` (e.g., `mcp_github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@@ -189,6 +189,21 @@ Opt-in toolset (not loaded in the default `hermes-cli` set). Add via `--toolsets
 |------|-------------|----------------------|
 | `video_analyze` | Analyze video content from a URL or file path — captions, scene breakdowns, key timestamps, and visual descriptions. | — |
 
+## `video_gen` toolset
+
+Opt-in toolset (not loaded in the default `hermes-cli` set). Add via `--toolsets video_gen` or enable it in `hermes tools` → Video Generation, which also walks you through picking a backend.
+
+Backends ship as plugins under `plugins/video_gen/<name>/`:
+
+- **xAI Grok-Imagine** — text-to-video and image-to-video (SuperGrok OAuth or `XAI_API_KEY`).
+- **FAL.ai** — Veo 3.1, Pixverse v6, Kling O3 (requires `FAL_KEY`).
+
+The single `video_generate` tool covers both modalities — pass `image_url` to animate a still, omit it to generate from text alone. The active backend auto-routes to the right endpoint. The tool's description is rebuilt at session start to reflect the active backend's actual capabilities (modalities, aspect ratios, resolutions, duration range, max reference images, audio support). See [Video Generation Provider Plugins](/docs/developer-guide/video-gen-provider-plugin) for backend authoring.
+
+| Tool | Description | Requires environment |
+|------|-------------|----------------------|
+| `video_generate` | Generate a video from a text prompt (text-to-video) or animate a still image (image-to-video) using the user's configured video generation backend. Pass `image_url` to animate that image; omit it to generate from text alone. The backend auto-routes to the right endpoint. Returns either an HTTP URL or an absolute file path in the `video` field. | Active `video_gen` plugin + its credential (e.g. `XAI_API_KEY`, `FAL_KEY`) |
+
 ## `web` toolset
 
 | Tool | Description | Requires environment |
diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md
index 0c5dd30cb2c..ec0d83b81f1 100644
--- a/website/docs/user-guide/features/tools.md
+++ b/website/docs/user-guide/features/tools.md
@@ -24,7 +24,7 @@ High-level categories:
 | **X Search** | `x_search` | Search X (Twitter) posts and threads via xAI's built-in `x_search` Responses tool — gated on xAI credentials (SuperGrok OAuth or `XAI_API_KEY`); off by default, opt in via `hermes tools` → 🐦 X (Twitter) Search. |
 | **Terminal & Files** | `terminal`, `process`, `read_file`, `patch` | Execute commands and manipulate files. |
 | **Browser** | `browser_navigate`, `browser_snapshot`, `browser_vision` | Interactive browser automation with text and vision support. |
-| **Media** | `vision_analyze`, `image_generate`, `text_to_speech` | Multimodal analysis and generation. |
+| **Media** | `vision_analyze`, `image_generate`, `video_generate`, `video_analyze`, `text_to_speech` | Multimodal analysis and generation. `video_generate` and `video_analyze` are opt-in (add `video_gen` / `video` toolsets via `hermes tools` or `--toolsets`). |
 | **Agent orchestration** | `todo`, `clarify`, `execute_code`, `delegate_task` | Planning, clarification, code execution, and subagent delegation. |
 | **Memory & recall** | `memory`, `session_search` | Persistent memory and session search. |
 | **Automation & delivery** | `cronjob`, `send_message` | Scheduled tasks with create/list/update/pause/resume/run/remove actions, plus outbound messaging delivery. |

From 6c2406c5e131dbbcabb69319c73c02594f63caea Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 11:53:57 -0700
Subject: [PATCH 136/218] fix(signal): read groupV2.id in envelope, fall back
 to legacy groupInfo (#27051)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Port from qwibitai/nanoclaw#1962: modern Signal V2-only groups surface on
dataMessage.groupV2.id, not groupInfo.groupId. signal-cli versions differ
in which field they expose for V2 groups — some forward the underlying
libsignal envelope verbatim (groupV2), others normalize everything into
groupInfo. Without a groupV2 read, V2-only groups appear as DMs because
groupInfo is undefined and the adapter misroutes them to the sender's
DM session.

Reads groupV2.id first, falls back to groupInfo.groupId. Also hardens
chat_name extraction against non-dict groupInfo payloads (crashed with
AttributeError under malformed envelopes).

6 new tests cover V2 routing, V1 legacy compatibility, V2-preferred
precedence, no-group DM path, allowlist enforcement, and malformed
payloads.
---
 gateway/platforms/signal.py  |  16 +++-
 tests/gateway/test_signal.py | 159 +++++++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index bd731a7ab5d..2a0aa3f80c1 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -490,9 +490,19 @@ class SignalAdapter(BasePlatformAdapter):
         if not data_message:
             return
 
-        # Check for group message
+        # Check for group message.
+        # Modern Signal groups surface on dataMessage.groupV2.id; legacy V1
+        # groups still arrive under dataMessage.groupInfo.groupId. signal-cli
+        # versions differ in which field they expose for V2 groups — some
+        # forward the underlying libsignal envelope verbatim (groupV2), others
+        # normalize everything into groupInfo. Read groupV2 first and fall
+        # back to groupInfo so V2-only groups aren't misrouted as DMs.
         group_info = data_message.get("groupInfo")
-        group_id = group_info.get("groupId") if group_info else None
+        group_v2 = data_message.get("groupV2")
+        group_id = (
+            (group_v2.get("id") if isinstance(group_v2, dict) else None)
+            or (group_info.get("groupId") if isinstance(group_info, dict) else None)
+        )
         is_group = bool(group_id)
 
         # Group message filtering — derived from SIGNAL_GROUP_ALLOWED_USERS:
@@ -562,7 +572,7 @@ class SignalAdapter(BasePlatformAdapter):
         # Build session source
         source = self.build_source(
             chat_id=chat_id,
-            chat_name=group_info.get("groupName") if group_info else sender_name,
+            chat_name=(group_info.get("groupName") if isinstance(group_info, dict) else None) or sender_name,
             chat_type=chat_type,
             user_id=sender,
             user_name=sender_name or sender,
diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py
index af81f59e8cd..7f34698f027 100644
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -1794,3 +1794,162 @@ class TestSignalContentlessEnvelope:
 
         assert "event" in captured, "Normal message should NOT be skipped"
         assert captured["event"].text == "hello world"
+
+
+# ---------------------------------------------------------------------------
+# Envelope handling — group routing (legacy groupInfo vs modern groupV2)
+# ---------------------------------------------------------------------------
+
+class TestSignalGroupV2Routing:
+    """Regression coverage for groupV2 envelope handling.
+
+    signal-cli's JSON-RPC ``subscribeReceive`` envelope shape has drifted across
+    versions: some forward the underlying libsignal V2 envelope as
+    ``dataMessage.groupV2.id`` while older / normalized paths still use
+    ``dataMessage.groupInfo.groupId``. The adapter must read groupV2 first and
+    fall back to groupInfo so V2-only groups aren't misrouted as DMs.
+
+    Ported from qwibitai/nanoclaw#1962 (V2 adapter improvements).
+    """
+
+    def _base_envelope(self, data_message: dict) -> dict:
+        return {
+            "envelope": {
+                "sourceNumber": "+15559998888",
+                "sourceUuid": "uuid-sender",
+                "sourceName": "Alice",
+                "timestamp": 1700000000000,
+                "dataMessage": data_message,
+            }
+        }
+
+    @pytest.mark.asyncio
+    async def test_group_v2_id_routes_as_group(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch, group_allowed="*")
+        captured = []
+
+        async def _capture(event):
+            captured.append(event)
+
+        adapter.handle_message = _capture
+
+        env = self._base_envelope({
+            "message": "hello v2",
+            "groupV2": {"id": "v2group=="},
+        })
+
+        await adapter._handle_envelope(env)
+
+        assert len(captured) == 1
+        assert captured[0].source.chat_id == "group:v2group=="
+        assert captured[0].source.chat_type == "group"
+        assert captured[0].text == "hello v2"
+
+    @pytest.mark.asyncio
+    async def test_legacy_group_info_still_works(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch, group_allowed="*")
+        captured = []
+
+        async def _capture(event):
+            captured.append(event)
+
+        adapter.handle_message = _capture
+
+        env = self._base_envelope({
+            "message": "hello v1",
+            "groupInfo": {"groupId": "legacy=="},
+        })
+
+        await adapter._handle_envelope(env)
+
+        assert len(captured) == 1
+        assert captured[0].source.chat_id == "group:legacy=="
+        assert captured[0].source.chat_type == "group"
+
+    @pytest.mark.asyncio
+    async def test_group_v2_preferred_over_group_info(self, monkeypatch):
+        """When both fields are present, groupV2 wins — it's the authoritative V2 id."""
+        adapter = _make_signal_adapter(monkeypatch, group_allowed="*")
+        captured = []
+
+        async def _capture(event):
+            captured.append(event)
+
+        adapter.handle_message = _capture
+
+        env = self._base_envelope({
+            "message": "hello",
+            "groupV2": {"id": "v2=="},
+            "groupInfo": {"groupId": "v1=="},
+        })
+
+        await adapter._handle_envelope(env)
+
+        assert len(captured) == 1
+        assert captured[0].source.chat_id == "group:v2=="
+
+    @pytest.mark.asyncio
+    async def test_no_group_fields_routes_as_dm(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        captured = []
+
+        async def _capture(event):
+            captured.append(event)
+
+        adapter.handle_message = _capture
+
+        env = self._base_envelope({"message": "direct message"})
+
+        await adapter._handle_envelope(env)
+
+        assert len(captured) == 1
+        assert captured[0].source.chat_type == "dm"
+        assert captured[0].source.chat_id == "+15559998888"
+
+    @pytest.mark.asyncio
+    async def test_group_v2_respects_allowlist(self, monkeypatch):
+        """V2 group ids flow through the same SIGNAL_GROUP_ALLOWED_USERS filter."""
+        adapter = _make_signal_adapter(monkeypatch, group_allowed="allowed-v2==")
+        captured = []
+
+        async def _capture(event):
+            captured.append(event)
+
+        adapter.handle_message = _capture
+
+        # Blocked group (not in allowlist)
+        await adapter._handle_envelope(self._base_envelope({
+            "message": "blocked",
+            "groupV2": {"id": "blocked-v2=="},
+        }))
+        assert len(captured) == 0
+
+        # Allowed group
+        await adapter._handle_envelope(self._base_envelope({
+            "message": "allowed",
+            "groupV2": {"id": "allowed-v2=="},
+        }))
+        assert len(captured) == 1
+        assert captured[0].source.chat_id == "group:allowed-v2=="
+
+    @pytest.mark.asyncio
+    async def test_malformed_group_fields_fall_through_to_dm(self, monkeypatch):
+        """Non-dict groupV2 / groupInfo shouldn't crash — treat as DM."""
+        adapter = _make_signal_adapter(monkeypatch)
+        captured = []
+
+        async def _capture(event):
+            captured.append(event)
+
+        adapter.handle_message = _capture
+
+        env = self._base_envelope({
+            "message": "malformed",
+            "groupV2": "not-a-dict",
+            "groupInfo": 42,
+        })
+
+        await adapter._handle_envelope(env)
+
+        assert len(captured) == 1
+        assert captured[0].source.chat_type == "dm"

From 8a2b2b9f6f9c419fdef48f542bf4b1991c655810 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 11:57:59 -0700
Subject: [PATCH 137/218] docs(release): expand v0.14.0 highlights with
 newcomer-friendly context (#27053)

Each highlight now gets 2-3 sentences explaining the user-facing value,
not just the technical change. Targeted at someone discovering Hermes
for the first time who isn't deep in the codebase.
---
 RELEASE_v0.14.0.md | 67 ++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/RELEASE_v0.14.0.md b/RELEASE_v0.14.0.md
index 9a5ee4ba8c8..30ab4189ac2 100644
--- a/RELEASE_v0.14.0.md
+++ b/RELEASE_v0.14.0.md
@@ -9,73 +9,70 @@
 
 ## ✨ Highlights
 
-- **xAI Grok via SuperGrok OAuth — and grok-4.3 jumps to a 1M context window** — Sign in with your xAI account, talk to Grok models from Hermes. The OAuth provider ships with proper entitlement-error handling, prelude-SSE recovery, and reasoning replay gating. grok-4.3's context bumps to 1M as part of the wire-through. Includes an SSH-to-tunnel docs page for remote-machine OAuth flows. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534), [#26664](https://github.com/NousResearch/hermes-agent/pull/26664), [#26644](https://github.com/NousResearch/hermes-agent/pull/26644), [#26592](https://github.com/NousResearch/hermes-agent/pull/26592))
+- **xAI Grok via SuperGrok OAuth — and grok-4.3 jumps to a 1M context window** — If you pay for SuperGrok, you can now use Grok inside Hermes by signing in with your xAI account — no API key, no separate billing. The wire-through also bumps grok-4.3 to a 1M token context window, so you can drop whole codebases or research corpora into a single prompt. Includes proper handling for entitlement errors and an SSH-to-tunnel docs page for when you're SSH'd into a remote box and need to complete the OAuth flow. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534), [#26664](https://github.com/NousResearch/hermes-agent/pull/26664), [#26644](https://github.com/NousResearch/hermes-agent/pull/26644), [#26592](https://github.com/NousResearch/hermes-agent/pull/26592))
 
-- **OpenAI-compatible local proxy for OAuth providers** — `hermes proxy` exposes any OAuth-authed Hermes provider — Claude Pro, ChatGPT Pro, SuperGrok — as an OpenAI-compatible HTTP endpoint. Point Codex / Aider / Cline / Continue at `http://localhost:port` and your subscription works in any OpenAI-compatible tool. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
+- **OpenAI-compatible local proxy for OAuth providers** — Run `hermes proxy` and you get a `http://localhost:port` endpoint that speaks the OpenAI API but is backed by whichever OAuth provider you're signed into — Claude Pro, ChatGPT Pro, SuperGrok. Now any tool that expects an OpenAI-compatible endpoint (Codex CLI, Aider, Cline, Continue, your custom scripts) just works with your existing subscription, no API key required. One subscription, every tool. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
 
-- **`x_search` — first-class X (Twitter) search tool** — gated tool with OAuth-or-API-key auth, no skill required. Search the timeline, find threads, surface posts — straight from the agent. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
+- **`x_search` — first-class X (Twitter) search tool** — The agent can now search X directly without installing a skill or wiring up a custom integration. Search the timeline, find threads, surface specific posts — straight from the chat. Auth with either your X OAuth login or an API key, whichever you have. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
 
-- **Microsoft Teams — end-to-end** — Hermes can now read and post to Teams. The full Microsoft Graph foundation lands together: auth + client foundation, webhook listener platform, Teams pipeline plugin runtime, and Teams outbound delivery via the existing adapter. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
+- **Microsoft Teams — end-to-end** — Hermes can now read messages from Teams and post back. The full Microsoft Graph stack lands together: auth + client foundation, a webhook listener that receives Teams events, a pipeline plugin runtime, and outbound delivery. Wire up the bot once, then chat to your agent from any Teams channel, DM, or group. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
 
-- **Debloating wave — lighter installs, less you don't need** — the heavy backends (Slack / Matrix / Feishu / DingTalk adapters, hindsight client, codex app-server, Pixverse / Camofox / image-gen SDKs, voice/TTS providers) now lazy-install on first use. The `[all]` extras drop everything covered by lazy-deps so a default install ships with vastly fewer wheels. A tiered install falls back through extras tiers when one rejects on the target platform. The supply-chain advisory checker scans every install/upgrade against an advisory list. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220), [#24515](https://github.com/NousResearch/hermes-agent/pull/24515), [#25014](https://github.com/NousResearch/hermes-agent/pull/25014), [#25038](https://github.com/NousResearch/hermes-agent/pull/25038), [#25766](https://github.com/NousResearch/hermes-agent/pull/25766), [#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
+- **Debloating wave — lighter installs, less you don't use** — A clean `pip install hermes-agent` used to pull down everything: every messaging adapter SDK, every image-gen SDK, every voice/TTS provider, whether you used them or not. Now those heavy backends (Slack / Matrix / Feishu / DingTalk adapters, hindsight client, codex app-server, Pixverse / Camofox / image-gen SDKs, voice/TTS providers) install automatically the first time you actually use them. The `[all]` extras drop everything covered by lazy-deps, the installer falls back through tiers when a wheel doesn't fit your platform, and a supply-chain advisory checker scans every install for unsafe versions. Faster installs, smaller disk footprint, fewer transitive vulnerabilities. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220), [#24515](https://github.com/NousResearch/hermes-agent/pull/24515), [#25014](https://github.com/NousResearch/hermes-agent/pull/25014), [#25038](https://github.com/NousResearch/hermes-agent/pull/25038), [#25766](https://github.com/NousResearch/hermes-agent/pull/25766), [#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
 
-- **`pip install hermes-agent && hermes`** — Hermes Agent is a real PyPI package now. One command, no clone, no git, no shell installer. The wheel ships with the Ink TUI bundle and the shell launcher. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593), [#26148](https://github.com/NousResearch/hermes-agent/pull/26148))
+- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. No more cloning the repo or running shell installers — one pip command and you're running. The wheel ships with the Ink TUI bundle and the shell launcher, so the full experience comes out of the box. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593), [#26148](https://github.com/NousResearch/hermes-agent/pull/26148))
 
-- **Cross-session 1h Claude prompt cache** — Anthropic, OpenRouter, and Nous Portal now share a 1-hour prefix cache across sessions for Claude models. `/new` and `/resume` hit cache. Background memory review hits cache. The system prompt is now byte-static within a session so cache layout never invalidates mid-conversation. Lower cost, faster first token. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828), [#25434](https://github.com/NousResearch/hermes-agent/pull/25434), [#24778](https://github.com/NousResearch/hermes-agent/pull/24778))
+- **Cross-session 1h Claude prompt cache** — When you use Claude through Anthropic, OpenRouter, or Nous Portal, the prompt prefix (system prompt, skills, memory) now caches for an hour across sessions. Start a `/new` session and the first response comes back faster and cheaper because the cache is still warm from your last session. Background memory review hits the cache too, so it's not paying full price every turn. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828), [#25434](https://github.com/NousResearch/hermes-agent/pull/25434), [#24778](https://github.com/NousResearch/hermes-agent/pull/24778))
 
-- **180x faster `browser_console` evaluations** — routed through the supervisor's persistent CDP WebSocket instead of spawning a fresh DevTools session per call. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
+- **180x faster `browser_console` evaluations** — When the agent uses the browser tool to inspect a page or run JavaScript, those calls now share one persistent connection to Chrome instead of spinning up a new DevTools session every time. The difference is huge: things that used to take a couple of seconds per call return in milliseconds. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
 
-- **Cold-start performance wave — ~19s off `hermes` launch** — skills cache + lazy Feishu + no Nous HTTP at startup, plus PEP-562 lazy adapter imports (QQ, Yuanbao, Teams, Google Chat), deferred `fal_client` / `google-cloud` / `httpx` loads, models.dev disk-cache-first lookup, parallel doctor API checks, eager-skip plugin discovery on built-in subcommands. `hermes tools` All-Platforms drops from 14s to <1.5s. Welcome banner skipped on `chat -q`. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
+- **Cold-start performance wave — ~19 seconds off `hermes` launch** — Running `hermes` used to make you wait through a chunk of import overhead and network calls before you saw a prompt. Now the launch path is mostly deferred: heavy adapters only load when you use them, model catalogs come from disk cache first, doctor checks run in parallel, and `chat -q` skips the welcome banner entirely. The `hermes tools` All-Platforms screen alone dropped from 14 seconds to under 1.5 seconds. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
 
-- **Two new messaging platforms — LINE + SimpleX Chat** — LINE Messaging API as a first-class platform, SimpleX Chat salvages #2558 onto the modern adapter spec. Hermes is now on 22 platforms. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
+- **Two new messaging platforms — LINE + SimpleX Chat** — LINE is huge in Japan, Korea, and Taiwan, and now Hermes runs natively on the LINE Messaging API. SimpleX Chat is the privacy-focused decentralized messenger with no user IDs — also wired up as a first-class platform. That brings Hermes to 22 messaging platforms total, so wherever you and your team chat, the agent can be there. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
 
-- **`/handoff` actually transfers the session live** — the agent's active session moves to a different model / persona / profile mid-conversation, with messages, tool history, and context preserved. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
+- **`/handoff` actually transfers the session live** — Switching models or personalities mid-conversation used to mean losing context or starting over. Now `/handoff` moves your active session — every message, every tool call, every piece of context — to the target model, persona, or profile, live, without dropping anything. Mid-debugging hand off from a fast model to a deep-reasoning one, or pass a session between profiles for different parts of a task. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
 
-- **Native button UI for `clarify` on Telegram and Discord** — the `clarify` tool renders multi-choice prompts as platform-native inline keyboards on Telegram and buttons on Discord instead of typed responses. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
+- **Native button UI for `clarify` on Telegram and Discord** — When the agent uses the `clarify` tool to ask you a multiple-choice question, it now shows real platform-native buttons on Telegram and Discord instead of asking you to type back the option number. Tap the button, the agent gets your answer. Especially nice on mobile. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
 
-- **Discord channel history backfill (default on)** — Hermes reads recent channel history when joining a thread so it actually knows what's already been said before responding. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
+- **Discord channel history backfill (default on)** — When Hermes joins a Discord channel or thread for the first time, it now reads the recent message history so it knows what's been said before it responds. No more "what are we talking about?" — the agent has the context that's already on screen for everyone else. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
 
-- **`vision_analyze` returns pixels to vision-capable models** — when the active model can see, `vision_analyze` now hands the image straight through instead of falling back to a text description. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
+- **`vision_analyze` returns pixels to vision-capable models** — When you point the agent at an image with `vision_analyze` and the active model can actually see (GPT-5, Claude, Gemini, Grok-vision), Hermes now passes the raw pixels straight to the model instead of converting them to a text description first. You get the model's actual visual reasoning instead of a degraded text-summary round-trip. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
 
-- **Per-turn file-mutation verifier footer** — after every turn that wrote files, the agent gets a verifier footer summarizing what actually changed on disk — catches silent overwrites and "wrote it but it didn't land" bugs. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
+- **Per-turn file-mutation verifier footer** — After every turn that wrote or edited files, the agent now gets a short footer summarizing exactly what changed on disk — the file paths, the line counts, the actual delta. That means the agent catches its own mistakes when a write didn't land or got silently overwritten, instead of confidently telling you "I added the function" when the file wasn't actually saved. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
 
-- **LSP semantic diagnostics on every write** — `write_file` and `patch` run real language-server diagnostics on the post-edit file (delta-only) and surface real errors before they ship downstream. Goes well beyond the in-proc Python/JSON/YAML/TOML linters from v0.13.0. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
+- **LSP semantic diagnostics on every write** — When the agent uses `write_file` or `patch`, Hermes now runs a real language server against the edited file and surfaces any new errors back to the agent before the next turn. Type errors, undefined symbols, missing imports — caught immediately. Goes way beyond v0.13.0's basic Python/JSON/YAML/TOML linting because it's actual semantic analysis. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
 
-- **Unified `video_generate` with pluggable provider backends** — single tool, any backend. Drop in a new video provider as a plugin, no core changes. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
+- **Unified `video_generate` with pluggable provider backends** — One tool, any video model. Hermes ships with the obvious backends already, but you can drop in a new video provider as a plugin without touching core. So when a new video model lands next month, it can be a one-file plugin instead of a fork. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
 
-- **`computer_use` cua-driver backend — works with non-Anthropic models now** — proper focus-safe ops, non-Anthropic provider support, refresh on `hermes update`. Computer-use is no longer locked to a single SDK. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
+- **`computer_use` cua-driver backend — works with non-Anthropic models now** — Computer-use (the agent controlling your mouse and keyboard to drive GUI apps) used to be locked to Anthropic's SDK. The new cua-driver backend works with non-Anthropic providers too, has proper focus-safe operations, and refreshes itself on `hermes update`. Now any vision-capable model can drive your desktop. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
 
-- **Clickable URLs in any terminal** — links in agent output now render as proper OSC8 hyperlinks with hover-highlight, in any terminal that supports them. Click to open. (@OutThisLife) ([#25071](https://github.com/NousResearch/hermes-agent/pull/25071), [#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
+- **Clickable URLs in any terminal** — Links in agent output are now real OSC8 hyperlinks with hover-highlight in any terminal that supports them. Click to open in your browser — no more copy-paste-trim of long URLs from the transcript. Just works in iTerm2, Kitty, Ghostty, modern Windows Terminal, etc. (@OutThisLife) ([#25071](https://github.com/NousResearch/hermes-agent/pull/25071), [#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
 
-- **Zed ACP Registry — `uvx` install in one click** — Hermes is in the Zed registry, installable via `uvx` (no npm needed). Plus `hermes acp --setup-browser` bootstraps browser tools for registry installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
+- **Zed ACP Registry — `uvx` install in one click** — Hermes is now listed in Zed's Agent Client Protocol registry, so Zed users can install it with one click. The install path uses `uvx` so there's no npm dependency. `hermes acp --setup-browser` bootstraps the browser tools for registry-driven installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
 
-- **OpenRouter Pareto Code router with `min_coding_score` knob** — pick the cheapest model that meets your quality bar. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
+- **OpenRouter Pareto Code router with `min_coding_score` knob** — OpenRouter's "Pareto" router automatically picks the cheapest model that meets a minimum quality bar. The new `min_coding_score` config lets you set that bar for coding tasks specifically — Hermes routes to the most affordable model that's at least that good at code. Stop paying for top-tier models when a mid-tier one would do. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
 
-- **NovitaAI as a new model provider** (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
+- **NovitaAI as a new model provider** — NovitaAI joins the provider lineup, giving you another option for open-source model hosting (Llama, Qwen, DeepSeek, etc.) with their pricing and rate limits. (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
 
-- **Codex app-server runtime for OpenAI/Codex models** — optional runtime that drives OpenAI's Codex CLI under the hood for OpenAI/Codex paths, with session reuse, wedge retirement, and OAuth refresh classification. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
+- **Codex app-server runtime for OpenAI/Codex models** — An optional runtime that drives OpenAI's Codex CLI under the hood when you're using OpenAI or Codex paths. You get session reuse, automatic retirement of wedged sessions, and proper OAuth refresh classification — the kind of plumbing that makes long agentic runs not fall over. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
 
-- **`huggingface/skills` as a trusted default tap** — community skills index from huggingface.co/skills is available by default in the Skills Hub. (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
+- **`huggingface/skills` as a trusted default tap** — The community skills index hosted at huggingface.co/skills is now wired into the Skills Hub by default. So when somebody publishes a useful skill there, you can install it from your own `hermes skills` browser without any extra config. (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
 
-- **9 new optional skills** — Hyperliquid (perp/spot trading via SDK + REST), Yahoo Finance market data, api-testing (REST/GraphQL debug), unified EVM multi-chain (folds #25291 + #2010 + base/), darwinian-evolver, osint-investigation (closes #355), pinggy-tunnel, watchers (RSS/HTTP/GitHub via cron `no_agent`), Notion overhaul for the May 2026 Developer Platform. ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
+- **9 new optional skills** — Hyperliquid (perp + spot trading via the SDK and REST API), Yahoo Finance (live market data, fundamentals, historicals), api-testing (REST + GraphQL debug recipes), unified EVM multi-chain (one skill covers Ethereum + L2s + Base), darwinian-evolver (evolutionary prompt/skill tuning), osint-investigation (OSINT recipes for people / domains / orgs), pinggy-tunnel (expose local services to the public internet), watchers (polls RSS / HTTP JSON / GitHub via cron `no_agent` mode for change detection), and a full Notion overhaul for the May 2026 Developer Platform. ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
 
-- **API server exposes run approval events** — long-running runs surface approval requests over the API stream, no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
+- **API server exposes run approval events** — If you're driving Hermes programmatically through the HTTP API, long-running runs no longer silently hang when the agent hits an approval-required command. The approval request now surfaces on the API stream so your client can prompt the user and reply — no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
 
-- **Plugins can run any LLM call via `ctx.llm` + `tool_override`** — plugins get a first-class hook to make their own LLM requests through the active provider/credentials, plus a `tool_override` flag for replacing built-in tools (closes #11049). ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
+- **Plugins can run any LLM call via `ctx.llm` + replace built-in tools via `tool_override`** — If you're writing a Hermes plugin, you now get first-class access to make LLM calls through the active provider and credentials — no manual client wiring. The new `tool_override` flag lets a plugin swap out a built-in tool with its own implementation cleanly. Plugin authors get the same model-routing and auth plumbing the core agent uses. (closes #11049) ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
 
-- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — two new free search backends alongside Tavily / SearXNG / Exa. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
+- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — Two new free web-search backends join Tavily, SearXNG, and Exa. Brave Search has a generous free tier; DDGS is the DuckDuckGo scraper that needs no key at all. Pick whichever fits your budget and rate-limit needs. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
 
-- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS classification** — closes the `sudo -S` brute-force avenue; approval gates classify stdin-fed and askpass-stripped sudo invocations as dangerous. (salvages of #22194 + #21128) (@kshitijk4poor) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
+- **Sudo brute-force block + 3 dangerous-command bypasses closed + tool-error sanitization** — The approval gate now blocks `sudo -S` brute-force attempts and classifies stdin-fed or askpass-stripped sudo invocations as DANGEROUS. Three known bypasses of dangerous-command detection are closed (inspired by Claude Code's command-detection work). And tool error strings are now sanitized before being re-injected into the model context, so a malicious file or remote service can't pass instructions to your agent through error output. ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736), [#26829](https://github.com/NousResearch/hermes-agent/pull/26829), [#26823](https://github.com/NousResearch/hermes-agent/pull/26823))
 
-- **Closes 3 dangerous-command detection bypasses + sanitizes tool error strings before injection** — inspired by Claude Code's command-detection work; closes 3 known bypasses and adds tool-error sanitization to stop adversarial output from re-entering the model context. ([#26829](https://github.com/NousResearch/hermes-agent/pull/26829), [#26823](https://github.com/NousResearch/hermes-agent/pull/26823))
+- **`/subgoal` — user-added criteria appended to an active `/goal`** — When you've got a `/goal` running (the persistent Ralph-loop goal where the agent keeps going until criteria are met), you can now use `/subgoal <text>` to layer extra success criteria onto it mid-run. The judge factors your new criteria into the done-or-keep-going decision without restarting the loop. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
 
-- **`/subgoal` — user-added criteria appended to active `/goal`** — layer extra success criteria onto a running goal loop. The judge sees them in the prompt, no behavior change when subgoals are empty. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
+- **Provider rename — Alibaba Cloud → Qwen Cloud** — The Alibaba Cloud provider is renamed to Qwen Cloud in the picker and config to match what the rest of the world calls it. Existing config keys still work — no breaking changes — but the UI matches the actual brand now. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
 
-- **Provider rename — Alibaba Cloud → Qwen Cloud, picker reorder** — matches what the world calls it. Existing config keys still work. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
+- **Native Windows support (early beta)** — Hermes now runs natively on `cmd.exe` and PowerShell without WSL. A full PowerShell installer handles MinGit auto-install, Microsoft Store python stub detection, and the foreground Ctrl+C dance. There's still rough edges (this is the "early beta" stamp) — ~40 follow-up Windows-only fixes already landed in the window — but the basic loop works end-to-end on a clean Windows box. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
 
-- **Native Windows support (early beta)** — Hermes runs natively on `cmd.exe` and PowerShell. Full PowerShell installer, native subprocess/PTY paths, taskkill-based process management, MinGit auto-install, foreground Ctrl+C preservation, ~40 follow-up Windows-only fixes. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
-
----
 
 ---
 

From 05af78c53d553f6dd20012ce18eb0c2c02d346c9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 12:15:45 -0700
Subject: [PATCH 138/218] fix(update): make Camofox lazy-installed instead of
 eager (#27055)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `@askjo/camofox-browser` npm package was a top-level entry in
the root `package.json` `dependencies` block, so `hermes update`
ran its postinstall on every user, every update. That postinstall
calls `npx camoufox-js fetch`, which silently downloads a ~300MB
Firefox-fork browser binary from GitHub Releases — multi-minute on
fast connections, and a hard block for users on slow / restricted
networks (notably users in China running through a VPN).

Camofox is an explicit opt-in browser backend. The runtime check
in `tools/browser_tool.py` only routes through Camofox when the
user has set `CAMOFOX_URL` (selected via `hermes tools` →
Browser Automation → Camofox). Users who never opted in never
touched the package at runtime, yet every `hermes update` paid
for the binary fetch anyway.

This change:

* Removes `@askjo/camofox-browser` from root `package.json`
  dependencies (and the regenerated `package-lock.json` drops
  Camofox's entire transitive tree, ~2.6k lines).
* Updates the Camofox `post_setup` handler in
  `hermes_cli/tools_config.py` to install
  `@askjo/camofox-browser@^1.5.2` explicitly when the user
  selects Camofox, and streams npm output (no `--silent`, no
  `capture_output`) so the ~300MB download is visible rather
  than appearing frozen.
* Adds `tests/test_package_json_lazy_deps.py` as a regression
  guard so future PRs can't silently re-add Camofox (or any
  binary-postinstall package) to eager root dependencies.

`agent-browser` stays eager — it is the default Chromium-driving
backend used by every session that does not have a cloud browser
provider configured, and its postinstall is small.

Validation:

| | Before | After |
|---|---|---|
| `hermes update` time on slow network | multi-minute hang at `→ Updating Node.js dependencies...` | seconds (no binary fetch) |
| Camofox opt-in install visibility | silent, looked frozen | streamed npm output |
| Regression guard against re-adding | none | `test_package_json_lazy_deps.py` |

Tests:
- `tests/test_package_json_lazy_deps.py`: 3/3 pass
- `tests/tools/test_browser_camofox*`: 92/92 pass
- `tests/hermes_cli/test_tools_config.py`: 66/66 pass
- `tests/hermes_cli/test_cmd_update.py` + adjacent: green

Reported by lulu (Discord, May 2026) — `hermes update` hangs at
`→ Updating Node.js dependencies...` in China.
Related: #18840, #18869.
---
 hermes_cli/tools_config.py           |   36 +-
 package-lock.json                    | 2630 --------------------------
 package.json                         |    1 -
 tests/test_package_json_lazy_deps.py |   85 +
 4 files changed, 110 insertions(+), 2642 deletions(-)
 create mode 100644 tests/test_package_json_lazy_deps.py

diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 074bd04aa64..3afaa5cc7c9 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -810,21 +810,35 @@ def _run_post_setup(post_setup_key: str):
         camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
         _npm_bin = shutil.which("npm")
         if not camofox_dir.exists() and _npm_bin:
-            _print_info("    Installing Camofox browser server...")
+            _print_info("    Installing Camofox browser package...")
+            _print_info("    First run downloads the Camoufox engine (~300MB) — this can take several minutes.")
             import subprocess
-            # Absolute npm path so .cmd shim executes on Windows.
-            result = subprocess.run(
-                [_npm_bin, "install", "--silent"],
-                capture_output=True, text=True, cwd=str(PROJECT_ROOT)
-            )
-            if result.returncode == 0:
-                _print_success("    Camofox installed")
-            else:
-                _print_warning("    npm install failed - run manually: npm install")
+            # Install @askjo/camofox-browser on-demand. It is NOT in
+            # package.json so that `hermes update` does not silently pull
+            # the ~300MB Camoufox Firefox-fork binary for every user.
+            # Stream output (no capture, no --silent) so the long-running
+            # postinstall download is visible instead of looking frozen.
+            try:
+                result = subprocess.run(
+                    [_npm_bin, "install", "@askjo/camofox-browser@^1.5.2",
+                     "--no-fund", "--no-audit", "--progress=false"],
+                    cwd=str(PROJECT_ROOT),
+                )
+                if result.returncode == 0:
+                    _print_success("    Camofox installed")
+                else:
+                    _print_warning(
+                        "    npm install failed — run manually: "
+                        "npm install @askjo/camofox-browser"
+                    )
+            except Exception as exc:
+                _print_warning(f"    Camofox install failed: {exc}")
+                _print_info(
+                    "    Run manually: npm install @askjo/camofox-browser"
+                )
         if camofox_dir.exists():
             _print_info("    Start the Camofox server:")
             _print_info("      npx @askjo/camofox-browser")
-            _print_info("    First run downloads the Camoufox engine (~300MB)")
             _print_info("    Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
         elif not shutil.which("npm"):
             _print_warning("    Node.js not found. Install Camofox via Docker:")
diff --git a/package-lock.json b/package-lock.json
index 8309e3b7a96..055fb0c9b50 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,90 +10,12 @@
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
-        "@askjo/camofox-browser": "^1.5.2",
         "agent-browser": "^0.26.0"
       },
       "engines": {
         "node": ">=20.0.0"
       }
     },
-    "node_modules/@askjo/camofox-browser": {
-      "version": "1.5.2",
-      "resolved": "https://registry.npmjs.org/@askjo/camofox-browser/-/camofox-browser-1.5.2.tgz",
-      "integrity": "sha512-SvRCzhWnJaplxHkRVF9l1OWako6pp2eUw2mZKHOERUfLWDO2Xe/IKI+5bB+UT1TNvO45P6XdhgfAtihcTEARCg==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "dependencies": {
-        "camoufox-js": "^0.8.5",
-        "express": "^4.18.2",
-        "playwright": "^1.50.0",
-        "playwright-core": "^1.58.0",
-        "playwright-extra": "^4.3.6",
-        "prom-client": "^15.1.3",
-        "puppeteer-extra-plugin-stealth": "^2.11.2"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@opentelemetry/api": {
-      "version": "1.9.1",
-      "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.1.tgz",
-      "integrity": "sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==",
-      "license": "Apache-2.0",
-      "engines": {
-        "node": ">=8.0.0"
-      }
-    },
-    "node_modules/@sindresorhus/is": {
-      "version": "4.6.0",
-      "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-4.6.0.tgz",
-      "integrity": "sha512-t09vSN3MdfsyCHoFcTRCH/iUtG7OJ0CsjzB8cjAmKc/va/kIgeDI/TxsigdncE/4be734m0cvIYwNaV4i2XqAw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sindresorhus/is?sponsor=1"
-      }
-    },
-    "node_modules/@types/debug": {
-      "version": "4.1.13",
-      "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz",
-      "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/ms": "*"
-      }
-    },
-    "node_modules/@types/ms": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz",
-      "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==",
-      "license": "MIT"
-    },
-    "node_modules/accepts": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
-      "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==",
-      "license": "MIT",
-      "dependencies": {
-        "mime-types": "~2.1.34",
-        "negotiator": "0.6.3"
-      },
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/adm-zip": {
-      "version": "0.5.17",
-      "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.17.tgz",
-      "integrity": "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=12.0"
-      }
-    },
     "node_modules/agent-browser": {
       "version": "0.26.0",
       "resolved": "https://registry.npmjs.org/agent-browser/-/agent-browser-0.26.0.tgz",
@@ -103,2558 +25,6 @@
       "bin": {
         "agent-browser": "bin/agent-browser.js"
       }
-    },
-    "node_modules/arr-union": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
-      "integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/array-flatten": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
-      "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
-      "license": "MIT"
-    },
-    "node_modules/balanced-match": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
-      "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
-      "license": "MIT",
-      "engines": {
-        "node": "18 || 20 || >=22"
-      }
-    },
-    "node_modules/base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/baseline-browser-mapping": {
-      "version": "2.10.18",
-      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.18.tgz",
-      "integrity": "sha512-VSnGQAOLtP5mib/DPyg2/t+Tlv65NTBz83BJBJvmLVHHuKJVaDOBvJJykiT5TR++em5nfAySPccDZDa4oSrn8A==",
-      "license": "Apache-2.0",
-      "bin": {
-        "baseline-browser-mapping": "dist/cli.cjs"
-      },
-      "engines": {
-        "node": ">=6.0.0"
-      }
-    },
-    "node_modules/better-sqlite3": {
-      "version": "12.9.0",
-      "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.9.0.tgz",
-      "integrity": "sha512-wqUv4Gm3toFpHDQmaKD4QhZm3g1DjUBI0yzS4UBl6lElUmXFYdTQmmEDpAFa5o8FiFiymURypEnfVHzILKaxqQ==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "dependencies": {
-        "bindings": "^1.5.0",
-        "prebuild-install": "^7.1.1"
-      },
-      "engines": {
-        "node": "20.x || 22.x || 23.x || 24.x || 25.x"
-      }
-    },
-    "node_modules/bindings": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
-      "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
-      "license": "MIT",
-      "dependencies": {
-        "file-uri-to-path": "1.0.0"
-      }
-    },
-    "node_modules/bintrees": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz",
-      "integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==",
-      "license": "MIT"
-    },
-    "node_modules/bl": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
-      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
-      "license": "MIT",
-      "dependencies": {
-        "buffer": "^5.5.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.4.0"
-      }
-    },
-    "node_modules/body-parser": {
-      "version": "1.20.4",
-      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz",
-      "integrity": "sha512-ZTgYYLMOXY9qKU/57FAo8F+HA2dGX7bqGc71txDRC1rS4frdFI5R7NhluHxH6M0YItAP0sHB4uqAOcYKxO6uGA==",
-      "license": "MIT",
-      "dependencies": {
-        "bytes": "~3.1.2",
-        "content-type": "~1.0.5",
-        "debug": "2.6.9",
-        "depd": "2.0.0",
-        "destroy": "~1.2.0",
-        "http-errors": "~2.0.1",
-        "iconv-lite": "~0.4.24",
-        "on-finished": "~2.4.1",
-        "qs": "~6.14.0",
-        "raw-body": "~2.5.3",
-        "type-is": "~1.6.18",
-        "unpipe": "~1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.8",
-        "npm": "1.2.8000 || >= 1.4.16"
-      }
-    },
-    "node_modules/brace-expansion": {
-      "version": "5.0.5",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
-      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^4.0.2"
-      },
-      "engines": {
-        "node": "18 || 20 || >=22"
-      }
-    },
-    "node_modules/browserslist": {
-      "version": "4.28.2",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz",
-      "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==",
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/browserslist"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/browserslist"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/ai"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "baseline-browser-mapping": "^2.10.12",
-        "caniuse-lite": "^1.0.30001782",
-        "electron-to-chromium": "^1.5.328",
-        "node-releases": "^2.0.36",
-        "update-browserslist-db": "^1.2.3"
-      },
-      "bin": {
-        "browserslist": "cli.js"
-      },
-      "engines": {
-        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
-      }
-    },
-    "node_modules/buffer": {
-      "version": "5.7.1",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
-      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.1.13"
-      }
-    },
-    "node_modules/bytes": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
-      "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/call-bind-apply-helpers": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
-      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
-      "license": "MIT",
-      "dependencies": {
-        "es-errors": "^1.3.0",
-        "function-bind": "^1.1.2"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/call-bound": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz",
-      "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==",
-      "license": "MIT",
-      "dependencies": {
-        "call-bind-apply-helpers": "^1.0.2",
-        "get-intrinsic": "^1.3.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/callsites": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
-      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/camoufox-js": {
-      "version": "0.8.5",
-      "resolved": "https://registry.npmjs.org/camoufox-js/-/camoufox-js-0.8.5.tgz",
-      "integrity": "sha512-20ihPbspAcOVSUTX9Drxxp0C116DON1n8OVA1eUDglWZiHwiHwFVFOMrIEBwAHMZpU11mIEH/kawJtstRIrDPA==",
-      "license": "MPL-2.0",
-      "dependencies": {
-        "adm-zip": "^0.5.16",
-        "better-sqlite3": "^12.2.0",
-        "commander": "^14.0.0",
-        "fingerprint-generator": "^2.1.66",
-        "glob": "^13.0.0",
-        "impit": "^0.7.0",
-        "language-tags": "^2.0.1",
-        "maxmind": "^5.0.0",
-        "progress": "^2.0.3",
-        "ua-parser-js": "^2.0.2",
-        "xml2js": "^0.6.2"
-      },
-      "bin": {
-        "camoufox-js": "dist/__main__.js"
-      },
-      "engines": {
-        "node": ">= 20"
-      },
-      "peerDependencies": {
-        "playwright-core": "*"
-      }
-    },
-    "node_modules/caniuse-lite": {
-      "version": "1.0.30001787",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001787.tgz",
-      "integrity": "sha512-mNcrMN9KeI68u7muanUpEejSLghOKlVhRqS/Za2IeyGllJ9I9otGpR9g3nsw7n4W378TE/LyIteA0+/FOZm4Kg==",
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/browserslist"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/ai"
-        }
-      ],
-      "license": "CC-BY-4.0"
-    },
-    "node_modules/chownr": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
-      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
-      "license": "ISC"
-    },
-    "node_modules/clone-deep": {
-      "version": "0.2.4",
-      "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
-      "integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
-      "license": "MIT",
-      "dependencies": {
-        "for-own": "^0.1.3",
-        "is-plain-object": "^2.0.1",
-        "kind-of": "^3.0.2",
-        "lazy-cache": "^1.0.3",
-        "shallow-clone": "^0.1.2"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/commander": {
-      "version": "14.0.3",
-      "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz",
-      "integrity": "sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=20"
-      }
-    },
-    "node_modules/concat-map": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-      "license": "MIT"
-    },
-    "node_modules/content-disposition": {
-      "version": "0.5.4",
-      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
-      "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
-      "license": "MIT",
-      "dependencies": {
-        "safe-buffer": "5.2.1"
-      },
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/content-type": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
-      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/cookie": {
-      "version": "0.7.2",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz",
-      "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/cookie-signature": {
-      "version": "1.0.7",
-      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.7.tgz",
-      "integrity": "sha512-NXdYc3dLr47pBkpUCHtKSwIOQXLVn8dZEuywboCOJY/osA0wFSLlSawr3KN8qXJEyX66FcONTH8EIlVuK0yyFA==",
-      "license": "MIT"
-    },
-    "node_modules/debug": {
-      "version": "2.6.9",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
-      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "2.0.0"
-      }
-    },
-    "node_modules/decompress-response": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
-      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
-      "license": "MIT",
-      "dependencies": {
-        "mimic-response": "^3.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/deep-extend": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
-      "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=4.0.0"
-      }
-    },
-    "node_modules/deepmerge": {
-      "version": "4.3.1",
-      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
-      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/depd": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
-      "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/destroy": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz",
-      "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8",
-        "npm": "1.2.8000 || >= 1.4.16"
-      }
-    },
-    "node_modules/detect-europe-js": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/detect-europe-js/-/detect-europe-js-0.1.2.tgz",
-      "integrity": "sha512-lgdERlL3u0aUdHocoouzT10d9I89VVhk0qNRmll7mXdGfJT1/wqZ2ZLA4oJAjeACPY5fT1wsbq2AT+GkuInsow==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/faisalman"
-        },
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/ua-parser-js"
-        },
-        {
-          "type": "paypal",
-          "url": "https://paypal.me/faisalman"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/detect-libc": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
-      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
-      "license": "Apache-2.0",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/dot-prop": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-6.0.1.tgz",
-      "integrity": "sha512-tE7ztYzXHIeyvc7N+hR3oi7FIbf/NIjVP9hmAt3yMXzrQ072/fpjGLx2GxNxGxUl5V73MEqYzioOMoVhGMJ5cA==",
-      "license": "MIT",
-      "dependencies": {
-        "is-obj": "^2.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/dunder-proto": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
-      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
-      "license": "MIT",
-      "dependencies": {
-        "call-bind-apply-helpers": "^1.0.1",
-        "es-errors": "^1.3.0",
-        "gopd": "^1.2.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/ee-first": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
-      "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==",
-      "license": "MIT"
-    },
-    "node_modules/electron-to-chromium": {
-      "version": "1.5.335",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.335.tgz",
-      "integrity": "sha512-q9n5T4BR4Xwa2cwbrwcsDJtHD/enpQ5S1xF1IAtdqf5AAgqDFmR/aakqH3ChFdqd/QXJhS3rnnXFtexU7rax6Q==",
-      "license": "ISC"
-    },
-    "node_modules/encodeurl": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
-      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/end-of-stream": {
-      "version": "1.4.5",
-      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
-      "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
-      "license": "MIT",
-      "dependencies": {
-        "once": "^1.4.0"
-      }
-    },
-    "node_modules/es-define-property": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
-      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/es-errors": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
-      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/es-object-atoms": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
-      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
-      "license": "MIT",
-      "dependencies": {
-        "es-errors": "^1.3.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/escalade": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
-      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/escape-html": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
-      "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
-      "license": "MIT"
-    },
-    "node_modules/etag": {
-      "version": "1.8.1",
-      "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
-      "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/expand-template": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
-      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
-      "license": "(MIT OR WTFPL)",
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/express": {
-      "version": "4.22.1",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz",
-      "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==",
-      "license": "MIT",
-      "dependencies": {
-        "accepts": "~1.3.8",
-        "array-flatten": "1.1.1",
-        "body-parser": "~1.20.3",
-        "content-disposition": "~0.5.4",
-        "content-type": "~1.0.4",
-        "cookie": "~0.7.1",
-        "cookie-signature": "~1.0.6",
-        "debug": "2.6.9",
-        "depd": "2.0.0",
-        "encodeurl": "~2.0.0",
-        "escape-html": "~1.0.3",
-        "etag": "~1.8.1",
-        "finalhandler": "~1.3.1",
-        "fresh": "~0.5.2",
-        "http-errors": "~2.0.0",
-        "merge-descriptors": "1.0.3",
-        "methods": "~1.1.2",
-        "on-finished": "~2.4.1",
-        "parseurl": "~1.3.3",
-        "path-to-regexp": "~0.1.12",
-        "proxy-addr": "~2.0.7",
-        "qs": "~6.14.0",
-        "range-parser": "~1.2.1",
-        "safe-buffer": "5.2.1",
-        "send": "~0.19.0",
-        "serve-static": "~1.16.2",
-        "setprototypeof": "1.2.0",
-        "statuses": "~2.0.1",
-        "type-is": "~1.6.18",
-        "utils-merge": "1.0.1",
-        "vary": "~1.1.2"
-      },
-      "engines": {
-        "node": ">= 0.10.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/express"
-      }
-    },
-    "node_modules/file-uri-to-path": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
-      "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
-      "license": "MIT"
-    },
-    "node_modules/finalhandler": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.2.tgz",
-      "integrity": "sha512-aA4RyPcd3badbdABGDuTXCMTtOneUCAYH/gxoYRTZlIJdF0YPWuGqiAsIrhNnnqdXGswYk6dGujem4w80UJFhg==",
-      "license": "MIT",
-      "dependencies": {
-        "debug": "2.6.9",
-        "encodeurl": "~2.0.0",
-        "escape-html": "~1.0.3",
-        "on-finished": "~2.4.1",
-        "parseurl": "~1.3.3",
-        "statuses": "~2.0.2",
-        "unpipe": "~1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/fingerprint-generator": {
-      "version": "2.1.82",
-      "resolved": "https://registry.npmjs.org/fingerprint-generator/-/fingerprint-generator-2.1.82.tgz",
-      "integrity": "sha512-5Z/yCKW324pMyMarpIKe/QPdkrFWKNJv3ktdU+fXHri80+HAwNE6QhMvEvsMkK9Q8DeCXZlpPHV77UBa1nFb4A==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "generative-bayesian-network": "^2.1.82",
-        "header-generator": "^2.1.82",
-        "tslib": "^2.4.0"
-      },
-      "engines": {
-        "node": ">=16.0.0"
-      }
-    },
-    "node_modules/for-in": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz",
-      "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/for-own": {
-      "version": "0.1.5",
-      "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
-      "integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
-      "license": "MIT",
-      "dependencies": {
-        "for-in": "^1.0.1"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/forwarded": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
-      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/fresh": {
-      "version": "0.5.2",
-      "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
-      "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/fs-constants": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
-      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
-      "license": "MIT"
-    },
-    "node_modules/fs-extra": {
-      "version": "10.1.0",
-      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
-      "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
-      "license": "MIT",
-      "dependencies": {
-        "graceful-fs": "^4.2.0",
-        "jsonfile": "^6.0.1",
-        "universalify": "^2.0.0"
-      },
-      "engines": {
-        "node": ">=12"
-      }
-    },
-    "node_modules/fs.realpath": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
-      "license": "ISC"
-    },
-    "node_modules/fsevents": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
-      "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
-      }
-    },
-    "node_modules/function-bind": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
-      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
-      "license": "MIT",
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/generative-bayesian-network": {
-      "version": "2.1.82",
-      "resolved": "https://registry.npmjs.org/generative-bayesian-network/-/generative-bayesian-network-2.1.82.tgz",
-      "integrity": "sha512-DH4NrmQheoMaJErdVv2IzaqkbOYSDQZmiZTV6UPDJYRDK2EyPpIQ88XRcYdPeFrUjS1N0Jj25H3HUywoJ1dbow==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "adm-zip": "^0.5.9",
-        "tslib": "^2.4.0"
-      }
-    },
-    "node_modules/get-intrinsic": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
-      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
-      "license": "MIT",
-      "dependencies": {
-        "call-bind-apply-helpers": "^1.0.2",
-        "es-define-property": "^1.0.1",
-        "es-errors": "^1.3.0",
-        "es-object-atoms": "^1.1.1",
-        "function-bind": "^1.1.2",
-        "get-proto": "^1.0.1",
-        "gopd": "^1.2.0",
-        "has-symbols": "^1.1.0",
-        "hasown": "^2.0.2",
-        "math-intrinsics": "^1.1.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/get-proto": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
-      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
-      "license": "MIT",
-      "dependencies": {
-        "dunder-proto": "^1.0.1",
-        "es-object-atoms": "^1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/github-from-package": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
-      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
-      "license": "MIT"
-    },
-    "node_modules/glob": {
-      "version": "13.0.6",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.6.tgz",
-      "integrity": "sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "minimatch": "^10.2.2",
-        "minipass": "^7.1.3",
-        "path-scurry": "^2.0.2"
-      },
-      "engines": {
-        "node": "18 || 20 || >=22"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/gopd": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
-      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/graceful-fs": {
-      "version": "4.2.11",
-      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
-      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
-      "license": "ISC"
-    },
-    "node_modules/has-symbols": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
-      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/hasown": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
-      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
-      "license": "MIT",
-      "dependencies": {
-        "function-bind": "^1.1.2"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/header-generator": {
-      "version": "2.1.82",
-      "resolved": "https://registry.npmjs.org/header-generator/-/header-generator-2.1.82.tgz",
-      "integrity": "sha512-4NjPB0+bAKjPoponSmTOkK58IEF2W22sOJA5O48k/MxbCZgOm+jrU4WVR53Z2I6xFgIPkVrQmKtt1LAbWtfqXw==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "browserslist": "^4.21.1",
-        "generative-bayesian-network": "^2.1.82",
-        "ow": "^0.28.1",
-        "tslib": "^2.4.0"
-      },
-      "engines": {
-        "node": ">=16.0.0"
-      }
-    },
-    "node_modules/http-errors": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz",
-      "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==",
-      "license": "MIT",
-      "dependencies": {
-        "depd": "~2.0.0",
-        "inherits": "~2.0.4",
-        "setprototypeof": "~1.2.0",
-        "statuses": "~2.0.2",
-        "toidentifier": "~1.0.1"
-      },
-      "engines": {
-        "node": ">= 0.8"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/express"
-      }
-    },
-    "node_modules/iconv-lite": {
-      "version": "0.4.24",
-      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
-      "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
-      "license": "MIT",
-      "dependencies": {
-        "safer-buffer": ">= 2.1.2 < 3"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "BSD-3-Clause"
-    },
-    "node_modules/impit": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit/-/impit-0.7.6.tgz",
-      "integrity": "sha512-AkS6Gv63+E6GMvBrcRhMmOREKpq5oJ0J5m3xwfkHiEs97UIsbpEqFmW3sFw/sdyOTDGRF5q4EjaLxtb922Ta8g==",
-      "license": "Apache-2.0",
-      "engines": {
-        "node": ">= 20"
-      },
-      "optionalDependencies": {
-        "impit-darwin-arm64": "0.7.6",
-        "impit-darwin-x64": "0.7.6",
-        "impit-linux-arm64-gnu": "0.7.6",
-        "impit-linux-arm64-musl": "0.7.6",
-        "impit-linux-x64-gnu": "0.7.6",
-        "impit-linux-x64-musl": "0.7.6",
-        "impit-win32-arm64-msvc": "0.7.6",
-        "impit-win32-x64-msvc": "0.7.6"
-      }
-    },
-    "node_modules/impit-darwin-arm64": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-darwin-arm64/-/impit-darwin-arm64-0.7.6.tgz",
-      "integrity": "sha512-M7NQXkttyzqilWfzVkNCp7hApT69m0etyJkVpHze4bR5z1kJnHhdsb8BSdDv2dzvZL4u1JyqZNxq+qoMn84eUw==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-darwin-x64": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-darwin-x64/-/impit-darwin-x64-0.7.6.tgz",
-      "integrity": "sha512-kikTesWirAwJp9JPxzGLoGVc+heBlEabWS5AhTkQedACU153vmuL90OBQikVr3ul2N0LPImvnuB+51wV0zDE6g==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-linux-arm64-gnu": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-linux-arm64-gnu/-/impit-linux-arm64-gnu-0.7.6.tgz",
-      "integrity": "sha512-H6GHjVr/0lG9VEJr6IHF8YLq+YkSIOF4k7Dfue2ygzUAj1+jZ5ZwnouhG/XrZHYW6EWsZmEAjjRfWE56Q0wDRQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-linux-arm64-musl": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-linux-arm64-musl/-/impit-linux-arm64-musl-0.7.6.tgz",
-      "integrity": "sha512-1sCB/UBVXLZTpGJsXRdNNSvhN9xmmQcYLMWAAB4Itb7w684RHX1pLoCb6ichv7bfAf6tgaupcFIFZNBp3ghmQA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-linux-x64-gnu": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-linux-x64-gnu/-/impit-linux-x64-gnu-0.7.6.tgz",
-      "integrity": "sha512-yYhlRnZ4fhKt8kuGe0JK2WSHc8TkR6BEH0wn+guevmu8EOn9Xu43OuRvkeOyVAkRqvFnlZtMyySUo/GuSLz9Gw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-linux-x64-musl": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-linux-x64-musl/-/impit-linux-x64-musl-0.7.6.tgz",
-      "integrity": "sha512-sdGWyu+PCLmaOXy7Mzo4WP61ZLl5qpZ1L+VeXW+Ycazgu0e7ox0NZLdiLRunIrEzD+h0S+e4CyzNwaiP3yIolg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-win32-arm64-msvc": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-win32-arm64-msvc/-/impit-win32-arm64-msvc-0.7.6.tgz",
-      "integrity": "sha512-sM5deBqo0EuXg5GACBUMKEua9jIau/i34bwNlfrf/Amnw1n0GB4/RkuUh+sKiUcbNAntrRq+YhCq8qDP8IW19w==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/impit-win32-x64-msvc": {
-      "version": "0.7.6",
-      "resolved": "https://registry.npmjs.org/impit-win32-x64-msvc/-/impit-win32-x64-msvc-0.7.6.tgz",
-      "integrity": "sha512-ry63ADGLCB/PU/vNB1VioRt2V+klDJ34frJUXUZBEv1kA96HEAg9AxUk+604o+UHS3ttGH2rkLmrbwHOdAct5Q==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">= 10"
-      }
-    },
-    "node_modules/inflight": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
-      "license": "ISC",
-      "dependencies": {
-        "once": "^1.3.0",
-        "wrappy": "1"
-      }
-    },
-    "node_modules/inherits": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
-      "license": "ISC"
-    },
-    "node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
-      "license": "ISC"
-    },
-    "node_modules/ipaddr.js": {
-      "version": "1.9.1",
-      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
-      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.10"
-      }
-    },
-    "node_modules/is-buffer": {
-      "version": "1.1.6",
-      "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
-      "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==",
-      "license": "MIT"
-    },
-    "node_modules/is-extendable": {
-      "version": "0.1.1",
-      "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
-      "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/is-obj": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/is-obj/-/is-obj-2.0.0.tgz",
-      "integrity": "sha512-drqDG3cbczxxEJRoOXcOjtdp1J/lyp1mNn0xaznRs8+muBhgQcrnbspox5X5fOw0HnMnbfDzvnEMEtqDEJEo8w==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/is-plain-object": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
-      "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
-      "license": "MIT",
-      "dependencies": {
-        "isobject": "^3.0.1"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/is-standalone-pwa": {
-      "version": "0.1.1",
-      "resolved": "https://registry.npmjs.org/is-standalone-pwa/-/is-standalone-pwa-0.1.1.tgz",
-      "integrity": "sha512-9Cbovsa52vNQCjdXOzeQq5CnCbAcRk05aU62K20WO372NrTv0NxibLFCK6lQ4/iZEFdEA3p3t2VNOn8AJ53F5g==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/faisalman"
-        },
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/ua-parser-js"
-        },
-        {
-          "type": "paypal",
-          "url": "https://paypal.me/faisalman"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/isobject": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
-      "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/jsonfile": {
-      "version": "6.2.0",
-      "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz",
-      "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==",
-      "license": "MIT",
-      "dependencies": {
-        "universalify": "^2.0.0"
-      },
-      "optionalDependencies": {
-        "graceful-fs": "^4.1.6"
-      }
-    },
-    "node_modules/kind-of": {
-      "version": "3.2.2",
-      "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
-      "integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
-      "license": "MIT",
-      "dependencies": {
-        "is-buffer": "^1.1.5"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/language-subtag-registry": {
-      "version": "0.3.23",
-      "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.23.tgz",
-      "integrity": "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ==",
-      "license": "CC0-1.0"
-    },
-    "node_modules/language-tags": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-2.1.0.tgz",
-      "integrity": "sha512-D4CgpyCt+61f6z2jHjJS1OmZPviAWM57iJ9OKdFFWSNgS7Udj9QVWqyGs/cveVNF57XpZmhSvMdVIV5mjLA7Vg==",
-      "license": "MIT",
-      "dependencies": {
-        "language-subtag-registry": "^0.3.20"
-      },
-      "engines": {
-        "node": ">=22"
-      }
-    },
-    "node_modules/lazy-cache": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
-      "integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/lodash.isequal": {
-      "version": "4.5.0",
-      "resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz",
-      "integrity": "sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==",
-      "deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead.",
-      "license": "MIT"
-    },
-    "node_modules/math-intrinsics": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
-      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      }
-    },
-    "node_modules/maxmind": {
-      "version": "5.0.6",
-      "resolved": "https://registry.npmjs.org/maxmind/-/maxmind-5.0.6.tgz",
-      "integrity": "sha512-5bvd/u+kIaTqaGM+xkXjatzQw1dQfSmlLggr2W1EKMyMxSgx2woZyusLpNpZ4DdPmL+1bbJWeo4LXsi6bC0Iew==",
-      "license": "MIT",
-      "dependencies": {
-        "mmdb-lib": "3.0.2",
-        "tiny-lru": "13.0.0"
-      },
-      "engines": {
-        "node": ">=12",
-        "npm": ">=6"
-      }
-    },
-    "node_modules/media-typer": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
-      "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/merge-deep": {
-      "version": "3.0.3",
-      "resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
-      "integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
-      "license": "MIT",
-      "dependencies": {
-        "arr-union": "^3.1.0",
-        "clone-deep": "^0.2.4",
-        "kind-of": "^3.0.2"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/merge-descriptors": {
-      "version": "1.0.3",
-      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz",
-      "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==",
-      "license": "MIT",
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/methods": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
-      "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/mime": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
-      "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
-      "license": "MIT",
-      "bin": {
-        "mime": "cli.js"
-      },
-      "engines": {
-        "node": ">=4"
-      }
-    },
-    "node_modules/mime-db": {
-      "version": "1.52.0",
-      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
-      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/mime-types": {
-      "version": "2.1.35",
-      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
-      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
-      "license": "MIT",
-      "dependencies": {
-        "mime-db": "1.52.0"
-      },
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/mimic-response": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
-      "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/minimatch": {
-      "version": "10.2.5",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
-      "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "brace-expansion": "^5.0.5"
-      },
-      "engines": {
-        "node": "18 || 20 || >=22"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/minimist": {
-      "version": "1.2.8",
-      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
-      "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
-      "license": "MIT",
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/minipass": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz",
-      "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==",
-      "license": "BlueOak-1.0.0",
-      "engines": {
-        "node": ">=16 || 14 >=14.17"
-      }
-    },
-    "node_modules/mixin-object": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
-      "integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
-      "license": "MIT",
-      "dependencies": {
-        "for-in": "^0.1.3",
-        "is-extendable": "^0.1.1"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/mixin-object/node_modules/for-in": {
-      "version": "0.1.8",
-      "resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
-      "integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/mkdirp-classic": {
-      "version": "0.5.3",
-      "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
-      "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
-      "license": "MIT"
-    },
-    "node_modules/mmdb-lib": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/mmdb-lib/-/mmdb-lib-3.0.2.tgz",
-      "integrity": "sha512-7e87vk0DdWT647wjcfEtWeMtjm+zVGqNohN/aeIymbUfjHQ2T4Sx5kM+1irVDBSloNC3CkGKxswdMoo8yhqTDg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=10",
-        "npm": ">=6"
-      }
-    },
-    "node_modules/ms": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT"
-    },
-    "node_modules/napi-build-utils": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
-      "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==",
-      "license": "MIT"
-    },
-    "node_modules/negotiator": {
-      "version": "0.6.3",
-      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
-      "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/node-abi": {
-      "version": "3.89.0",
-      "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz",
-      "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==",
-      "license": "MIT",
-      "dependencies": {
-        "semver": "^7.3.5"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/node-releases": {
-      "version": "2.0.37",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.37.tgz",
-      "integrity": "sha512-1h5gKZCF+pO/o3Iqt5Jp7wc9rH3eJJ0+nh/CIoiRwjRxde/hAHyLPXYN4V3CqKAbiZPSeJFSWHmJsbkicta0Eg==",
-      "license": "MIT"
-    },
-    "node_modules/object-inspect": {
-      "version": "1.13.4",
-      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
-      "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/on-finished": {
-      "version": "2.4.1",
-      "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
-      "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
-      "license": "MIT",
-      "dependencies": {
-        "ee-first": "1.1.1"
-      },
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/once": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
-      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
-      "license": "ISC",
-      "dependencies": {
-        "wrappy": "1"
-      }
-    },
-    "node_modules/ow": {
-      "version": "0.28.2",
-      "resolved": "https://registry.npmjs.org/ow/-/ow-0.28.2.tgz",
-      "integrity": "sha512-dD4UpyBh/9m4X2NVjA+73/ZPBRF+uF4zIMFvvQsabMiEK8x41L3rQ8EENOi35kyyoaJwNxEeJcP6Fj1H4U409Q==",
-      "license": "MIT",
-      "dependencies": {
-        "@sindresorhus/is": "^4.2.0",
-        "callsites": "^3.1.0",
-        "dot-prop": "^6.0.1",
-        "lodash.isequal": "^4.5.0",
-        "vali-date": "^1.0.0"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/parseurl": {
-      "version": "1.3.3",
-      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
-      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/path-is-absolute": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
-      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/path-scurry": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz",
-      "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==",
-      "license": "BlueOak-1.0.0",
-      "dependencies": {
-        "lru-cache": "^11.0.0",
-        "minipass": "^7.1.2"
-      },
-      "engines": {
-        "node": "18 || 20 || >=22"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/path-scurry/node_modules/lru-cache": {
-      "version": "11.3.3",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.3.tgz",
-      "integrity": "sha512-JvNw9Y81y33E+BEYPr0U7omo+U9AySnsMsEiXgwT6yqd31VQWTLNQqmT4ou5eqPFUrTfIDFta2wKhB1hyohtAQ==",
-      "license": "BlueOak-1.0.0",
-      "engines": {
-        "node": "20 || >=22"
-      }
-    },
-    "node_modules/path-to-regexp": {
-      "version": "0.1.13",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.13.tgz",
-      "integrity": "sha512-A/AGNMFN3c8bOlvV9RreMdrv7jsmF9XIfDeCd87+I8RNg6s78BhJxMu69NEMHBSJFxKidViTEdruRwEk/WIKqA==",
-      "license": "MIT"
-    },
-    "node_modules/picocolors": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
-      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
-      "license": "ISC"
-    },
-    "node_modules/playwright": {
-      "version": "1.59.1",
-      "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
-      "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "playwright-core": "1.59.1"
-      },
-      "bin": {
-        "playwright": "cli.js"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "optionalDependencies": {
-        "fsevents": "2.3.2"
-      }
-    },
-    "node_modules/playwright-core": {
-      "version": "1.59.1",
-      "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
-      "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
-      "license": "Apache-2.0",
-      "bin": {
-        "playwright-core": "cli.js"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/playwright-extra": {
-      "version": "4.3.6",
-      "resolved": "https://registry.npmjs.org/playwright-extra/-/playwright-extra-4.3.6.tgz",
-      "integrity": "sha512-q2rVtcE8V8K3vPVF1zny4pvwZveHLH8KBuVU2MoE3Jw4OKVoBWsHI9CH9zPydovHHOCDxjGN2Vg+2m644q3ijA==",
-      "license": "MIT",
-      "dependencies": {
-        "debug": "^4.3.4"
-      },
-      "engines": {
-        "node": ">=12"
-      },
-      "peerDependencies": {
-        "playwright": "*",
-        "playwright-core": "*"
-      },
-      "peerDependenciesMeta": {
-        "playwright": {
-          "optional": true
-        },
-        "playwright-core": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/playwright-extra/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/playwright-extra/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/prebuild-install": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
-      "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
-      "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.",
-      "license": "MIT",
-      "dependencies": {
-        "detect-libc": "^2.0.0",
-        "expand-template": "^2.0.3",
-        "github-from-package": "0.0.0",
-        "minimist": "^1.2.3",
-        "mkdirp-classic": "^0.5.3",
-        "napi-build-utils": "^2.0.0",
-        "node-abi": "^3.3.0",
-        "pump": "^3.0.0",
-        "rc": "^1.2.7",
-        "simple-get": "^4.0.0",
-        "tar-fs": "^2.0.0",
-        "tunnel-agent": "^0.6.0"
-      },
-      "bin": {
-        "prebuild-install": "bin.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/progress": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
-      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.4.0"
-      }
-    },
-    "node_modules/prom-client": {
-      "version": "15.1.3",
-      "resolved": "https://registry.npmjs.org/prom-client/-/prom-client-15.1.3.tgz",
-      "integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@opentelemetry/api": "^1.4.0",
-        "tdigest": "^0.1.1"
-      },
-      "engines": {
-        "node": "^16 || ^18 || >=20"
-      }
-    },
-    "node_modules/proxy-addr": {
-      "version": "2.0.7",
-      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
-      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
-      "license": "MIT",
-      "dependencies": {
-        "forwarded": "0.2.0",
-        "ipaddr.js": "1.9.1"
-      },
-      "engines": {
-        "node": ">= 0.10"
-      }
-    },
-    "node_modules/pump": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz",
-      "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==",
-      "license": "MIT",
-      "dependencies": {
-        "end-of-stream": "^1.1.0",
-        "once": "^1.3.1"
-      }
-    },
-    "node_modules/puppeteer-extra-plugin": {
-      "version": "3.2.3",
-      "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz",
-      "integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==",
-      "license": "MIT",
-      "dependencies": {
-        "@types/debug": "^4.1.0",
-        "debug": "^4.1.1",
-        "merge-deep": "^3.0.1"
-      },
-      "engines": {
-        "node": ">=9.11.2"
-      },
-      "peerDependencies": {
-        "playwright-extra": "*",
-        "puppeteer-extra": "*"
-      },
-      "peerDependenciesMeta": {
-        "playwright-extra": {
-          "optional": true
-        },
-        "puppeteer-extra": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-stealth": {
-      "version": "2.11.2",
-      "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.2.tgz",
-      "integrity": "sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==",
-      "license": "MIT",
-      "dependencies": {
-        "debug": "^4.1.1",
-        "puppeteer-extra-plugin": "^3.2.3",
-        "puppeteer-extra-plugin-user-preferences": "^2.4.1"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "peerDependencies": {
-        "playwright-extra": "*",
-        "puppeteer-extra": "*"
-      },
-      "peerDependenciesMeta": {
-        "playwright-extra": {
-          "optional": true
-        },
-        "puppeteer-extra": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-stealth/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-stealth/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir": {
-      "version": "2.4.1",
-      "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz",
-      "integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==",
-      "license": "MIT",
-      "dependencies": {
-        "debug": "^4.1.1",
-        "fs-extra": "^10.0.0",
-        "puppeteer-extra-plugin": "^3.2.3",
-        "rimraf": "^3.0.2"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "peerDependencies": {
-        "playwright-extra": "*",
-        "puppeteer-extra": "*"
-      },
-      "peerDependenciesMeta": {
-        "playwright-extra": {
-          "optional": true
-        },
-        "puppeteer-extra": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/brace-expansion": {
-      "version": "1.1.14",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz",
-      "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==",
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/glob": {
-      "version": "7.2.3",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
-      "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
-      "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me",
-      "license": "ISC",
-      "dependencies": {
-        "fs.realpath": "^1.0.0",
-        "inflight": "^1.0.4",
-        "inherits": "2",
-        "minimatch": "^3.1.1",
-        "once": "^1.3.0",
-        "path-is-absolute": "^1.0.0"
-      },
-      "engines": {
-        "node": "*"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/minimatch": {
-      "version": "3.1.5",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
-      "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^1.1.7"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/rimraf": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
-      "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
-      "deprecated": "Rimraf versions prior to v4 are no longer supported",
-      "license": "ISC",
-      "dependencies": {
-        "glob": "^7.1.3"
-      },
-      "bin": {
-        "rimraf": "bin.js"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-preferences": {
-      "version": "2.4.1",
-      "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz",
-      "integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==",
-      "license": "MIT",
-      "dependencies": {
-        "debug": "^4.1.1",
-        "deepmerge": "^4.2.2",
-        "puppeteer-extra-plugin": "^3.2.3",
-        "puppeteer-extra-plugin-user-data-dir": "^2.4.1"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "peerDependencies": {
-        "playwright-extra": "*",
-        "puppeteer-extra": "*"
-      },
-      "peerDependenciesMeta": {
-        "playwright-extra": {
-          "optional": true
-        },
-        "puppeteer-extra": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-preferences/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin-user-preferences/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/puppeteer-extra-plugin/node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/puppeteer-extra-plugin/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/qs": {
-      "version": "6.14.2",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.2.tgz",
-      "integrity": "sha512-V/yCWTTF7VJ9hIh18Ugr2zhJMP01MY7c5kh4J870L7imm6/DIzBsNLTXzMwUA3yZ5b/KBqLx8Kp3uRvd7xSe3Q==",
-      "license": "BSD-3-Clause",
-      "dependencies": {
-        "side-channel": "^1.1.0"
-      },
-      "engines": {
-        "node": ">=0.6"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/range-parser": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
-      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/raw-body": {
-      "version": "2.5.3",
-      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.3.tgz",
-      "integrity": "sha512-s4VSOf6yN0rvbRZGxs8Om5CWj6seneMwK3oDb4lWDH0UPhWcxwOWw5+qk24bxq87szX1ydrwylIOp2uG1ojUpA==",
-      "license": "MIT",
-      "dependencies": {
-        "bytes": "~3.1.2",
-        "http-errors": "~2.0.1",
-        "iconv-lite": "~0.4.24",
-        "unpipe": "~1.0.0"
-      },
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/rc": {
-      "version": "1.2.8",
-      "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
-      "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
-      "license": "(BSD-2-Clause OR MIT OR Apache-2.0)",
-      "dependencies": {
-        "deep-extend": "^0.6.0",
-        "ini": "~1.3.0",
-        "minimist": "^1.2.0",
-        "strip-json-comments": "~2.0.1"
-      },
-      "bin": {
-        "rc": "cli.js"
-      }
-    },
-    "node_modules/readable-stream": {
-      "version": "3.6.2",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
-      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
-      "license": "MIT",
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
-    "node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/safer-buffer": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
-      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
-      "license": "MIT"
-    },
-    "node_modules/sax": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/sax/-/sax-1.6.0.tgz",
-      "integrity": "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA==",
-      "license": "BlueOak-1.0.0",
-      "engines": {
-        "node": ">=11.0.0"
-      }
-    },
-    "node_modules/semver": {
-      "version": "7.7.4",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
-      "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
-      "license": "ISC",
-      "bin": {
-        "semver": "bin/semver.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/send": {
-      "version": "0.19.2",
-      "resolved": "https://registry.npmjs.org/send/-/send-0.19.2.tgz",
-      "integrity": "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==",
-      "license": "MIT",
-      "dependencies": {
-        "debug": "2.6.9",
-        "depd": "2.0.0",
-        "destroy": "1.2.0",
-        "encodeurl": "~2.0.0",
-        "escape-html": "~1.0.3",
-        "etag": "~1.8.1",
-        "fresh": "~0.5.2",
-        "http-errors": "~2.0.1",
-        "mime": "1.6.0",
-        "ms": "2.1.3",
-        "on-finished": "~2.4.1",
-        "range-parser": "~1.2.1",
-        "statuses": "~2.0.2"
-      },
-      "engines": {
-        "node": ">= 0.8.0"
-      }
-    },
-    "node_modules/send/node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/serve-static": {
-      "version": "1.16.3",
-      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.3.tgz",
-      "integrity": "sha512-x0RTqQel6g5SY7Lg6ZreMmsOzncHFU7nhnRWkKgWuMTu5NN0DR5oruckMqRvacAN9d5w6ARnRBXl9xhDCgfMeA==",
-      "license": "MIT",
-      "dependencies": {
-        "encodeurl": "~2.0.0",
-        "escape-html": "~1.0.3",
-        "parseurl": "~1.3.3",
-        "send": "~0.19.1"
-      },
-      "engines": {
-        "node": ">= 0.8.0"
-      }
-    },
-    "node_modules/setprototypeof": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
-      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
-      "license": "ISC"
-    },
-    "node_modules/shallow-clone": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
-      "integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
-      "license": "MIT",
-      "dependencies": {
-        "is-extendable": "^0.1.1",
-        "kind-of": "^2.0.1",
-        "lazy-cache": "^0.2.3",
-        "mixin-object": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/shallow-clone/node_modules/kind-of": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
-      "integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
-      "license": "MIT",
-      "dependencies": {
-        "is-buffer": "^1.0.2"
-      },
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/shallow-clone/node_modules/lazy-cache": {
-      "version": "0.2.7",
-      "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
-      "integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/side-channel": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
-      "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
-      "license": "MIT",
-      "dependencies": {
-        "es-errors": "^1.3.0",
-        "object-inspect": "^1.13.3",
-        "side-channel-list": "^1.0.0",
-        "side-channel-map": "^1.0.1",
-        "side-channel-weakmap": "^1.0.2"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/side-channel-list": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz",
-      "integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==",
-      "license": "MIT",
-      "dependencies": {
-        "es-errors": "^1.3.0",
-        "object-inspect": "^1.13.4"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/side-channel-map": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
-      "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
-      "license": "MIT",
-      "dependencies": {
-        "call-bound": "^1.0.2",
-        "es-errors": "^1.3.0",
-        "get-intrinsic": "^1.2.5",
-        "object-inspect": "^1.13.3"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/side-channel-weakmap": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
-      "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
-      "license": "MIT",
-      "dependencies": {
-        "call-bound": "^1.0.2",
-        "es-errors": "^1.3.0",
-        "get-intrinsic": "^1.2.5",
-        "object-inspect": "^1.13.3",
-        "side-channel-map": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
-    "node_modules/simple-concat": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
-      "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/simple-get": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
-      "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "decompress-response": "^6.0.0",
-        "once": "^1.3.1",
-        "simple-concat": "^1.0.0"
-      }
-    },
-    "node_modules/statuses": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
-      "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/string_decoder": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
-      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
-      "license": "MIT",
-      "dependencies": {
-        "safe-buffer": "~5.2.0"
-      }
-    },
-    "node_modules/strip-json-comments": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
-      "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/tar-fs": {
-      "version": "2.1.4",
-      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz",
-      "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==",
-      "license": "MIT",
-      "dependencies": {
-        "chownr": "^1.1.1",
-        "mkdirp-classic": "^0.5.2",
-        "pump": "^3.0.0",
-        "tar-stream": "^2.1.4"
-      }
-    },
-    "node_modules/tar-stream": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
-      "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
-      "license": "MIT",
-      "dependencies": {
-        "bl": "^4.0.3",
-        "end-of-stream": "^1.4.1",
-        "fs-constants": "^1.0.0",
-        "inherits": "^2.0.3",
-        "readable-stream": "^3.1.1"
-      },
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/tdigest": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz",
-      "integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==",
-      "license": "MIT",
-      "dependencies": {
-        "bintrees": "1.0.2"
-      }
-    },
-    "node_modules/tiny-lru": {
-      "version": "13.0.0",
-      "resolved": "https://registry.npmjs.org/tiny-lru/-/tiny-lru-13.0.0.tgz",
-      "integrity": "sha512-xDHxKKS1FdF0Tv2P+QT7IeSEg74K/8cEDzbv3Tv6UyHHUgBOjOiQiBp818MGj66dhurQus/IBcoAbwIKtSGc6Q==",
-      "license": "BSD-3-Clause",
-      "engines": {
-        "node": ">=14"
-      }
-    },
-    "node_modules/toidentifier": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
-      "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.6"
-      }
-    },
-    "node_modules/tslib": {
-      "version": "2.8.1",
-      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
-      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
-      "license": "0BSD"
-    },
-    "node_modules/tunnel-agent": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
-      "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "safe-buffer": "^5.0.1"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/type-is": {
-      "version": "1.6.18",
-      "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
-      "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
-      "license": "MIT",
-      "dependencies": {
-        "media-typer": "0.3.0",
-        "mime-types": "~2.1.24"
-      },
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/ua-is-frozen": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/ua-is-frozen/-/ua-is-frozen-0.1.2.tgz",
-      "integrity": "sha512-RwKDW2p3iyWn4UbaxpP2+VxwqXh0jpvdxsYpZ5j/MLLiQOfbsV5shpgQiw93+KMYQPcteeMQ289MaAFzs3G9pw==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/faisalman"
-        },
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/ua-parser-js"
-        },
-        {
-          "type": "paypal",
-          "url": "https://paypal.me/faisalman"
-        }
-      ],
-      "license": "MIT"
-    },
-    "node_modules/ua-parser-js": {
-      "version": "2.0.9",
-      "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-2.0.9.tgz",
-      "integrity": "sha512-OsqGhxyo/wGdLSXMSJxuMGN6H4gDnKz6Fb3IBm4bxZFMnyy0sdf6MN96Ie8tC6z/btdO+Bsy8guxlvLdwT076w==",
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/ua-parser-js"
-        },
-        {
-          "type": "paypal",
-          "url": "https://paypal.me/faisalman"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/faisalman"
-        }
-      ],
-      "license": "AGPL-3.0-or-later",
-      "dependencies": {
-        "detect-europe-js": "^0.1.2",
-        "is-standalone-pwa": "^0.1.1",
-        "ua-is-frozen": "^0.1.2"
-      },
-      "bin": {
-        "ua-parser-js": "script/cli.js"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/universalify": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
-      "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 10.0.0"
-      }
-    },
-    "node_modules/unpipe": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
-      "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/update-browserslist-db": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz",
-      "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==",
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/browserslist"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/browserslist"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/ai"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "escalade": "^3.2.0",
-        "picocolors": "^1.1.1"
-      },
-      "bin": {
-        "update-browserslist-db": "cli.js"
-      },
-      "peerDependencies": {
-        "browserslist": ">= 4.21.0"
-      }
-    },
-    "node_modules/util-deprecate": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
-      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
-      "license": "MIT"
-    },
-    "node_modules/utils-merge": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
-      "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4.0"
-      }
-    },
-    "node_modules/vali-date": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/vali-date/-/vali-date-1.0.0.tgz",
-      "integrity": "sha512-sgECfZthyaCKW10N0fm27cg8HYTFK5qMWgypqkXMQ4Wbl/zZKx7xZICgcoxIIE+WFAP/MBL2EFwC/YvLxw3Zeg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
-    "node_modules/vary": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
-      "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8"
-      }
-    },
-    "node_modules/wrappy": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
-      "license": "ISC"
-    },
-    "node_modules/xml2js": {
-      "version": "0.6.2",
-      "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.6.2.tgz",
-      "integrity": "sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==",
-      "license": "MIT",
-      "dependencies": {
-        "sax": ">=0.6.0",
-        "xmlbuilder": "~11.0.0"
-      },
-      "engines": {
-        "node": ">=4.0.0"
-      }
-    },
-    "node_modules/xmlbuilder": {
-      "version": "11.0.1",
-      "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
-      "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=4.0"
-      }
     }
   }
 }
diff --git a/package.json b/package.json
index 8fcf5cea696..7500796acd6 100644
--- a/package.json
+++ b/package.json
@@ -16,7 +16,6 @@
   },
   "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
   "dependencies": {
-    "@askjo/camofox-browser": "^1.5.2",
     "agent-browser": "^0.26.0"
   },
   "overrides": {
diff --git a/tests/test_package_json_lazy_deps.py b/tests/test_package_json_lazy_deps.py
new file mode 100644
index 00000000000..0e2456dba2a
--- /dev/null
+++ b/tests/test_package_json_lazy_deps.py
@@ -0,0 +1,85 @@
+"""Invariants for what is eager vs lazy in the root ``package.json``.
+
+The root ``package.json`` is installed by ``hermes update`` on every user,
+including users who never opted into a given browser backend. Anything
+listed in ``dependencies`` therefore runs its npm postinstall script for
+everyone — including binary-fetching backends, on every update.
+
+The contract:
+
+* ``agent-browser`` IS eager. It is the default Chromium-driving backend
+  used whenever the agent makes a browser call without a cloud provider
+  configured, so it must already be installed before any session starts.
+  Its postinstall is also small.
+
+* ``@askjo/camofox-browser`` is NOT eager. It is an explicit opt-in
+  alternative browser backend, selected by the user via
+  ``hermes tools`` → Browser Automation → Camofox, and only used at
+  runtime when ``CAMOFOX_URL`` is set. Its postinstall fetches a ~300MB
+  Firefox-fork binary, which silently blocked ``hermes update`` for
+  multi-minute stretches on slow / network-restricted connections
+  (notably users in China running through a VPN). The package is
+  installed on demand by ``tools_config.py`` ``post_setup_key ==
+  "camofox"`` when the user actually selects Camofox.
+
+If a future PR re-adds Camofox (or any other binary-postinstall package)
+to root ``dependencies``, this test fails — read the lazy-install
+guidance in the ``hermes-agent-dev`` skill before changing the
+expectations.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+
+
+def _root_package_json() -> dict:
+    with (REPO_ROOT / "package.json").open("r", encoding="utf-8") as fh:
+        return json.load(fh)
+
+
+def test_camofox_is_not_in_root_dependencies() -> None:
+    """Camofox must be opt-in, installed lazily by its post_setup handler."""
+    deps = _root_package_json().get("dependencies", {})
+    assert "@askjo/camofox-browser" not in deps, (
+        "Camofox is a ~300MB binary-postinstall backend that must stay "
+        "out of root package.json dependencies. It belongs in the "
+        "Camofox post_setup handler in hermes_cli/tools_config.py so it "
+        "only installs when the user explicitly selects Camofox via "
+        "`hermes tools` → Browser Automation → Camofox."
+    )
+
+
+def test_agent_browser_stays_eager() -> None:
+    """agent-browser is the default backend; it must remain eager."""
+    deps = _root_package_json().get("dependencies", {})
+    assert "agent-browser" in deps, (
+        "agent-browser is the default browser-tool backend used by every "
+        "session that doesn't have a cloud browser provider configured. "
+        "It must stay in root package.json dependencies so it is present "
+        "after `hermes setup` / `hermes update` without an explicit "
+        "post_setup step."
+    )
+
+
+def test_root_lockfile_has_no_camofox_entries() -> None:
+    """Regenerated lockfiles should not contain Camofox tree entries."""
+    lock_path = REPO_ROOT / "package-lock.json"
+    if not lock_path.exists():
+        # Some CI matrix shards skip lockfile materialization.
+        return
+    text = lock_path.read_text(encoding="utf-8")
+    assert "@askjo/camofox-browser" not in text, (
+        "package-lock.json still references @askjo/camofox-browser. "
+        "Regenerate the lockfile after removing the dep: "
+        "`rm package-lock.json && npm install --package-lock-only "
+        "--ignore-scripts --no-fund --no-audit`."
+    )
+    assert "camoufox-js" not in text, (
+        "package-lock.json still references camoufox-js (transitive of "
+        "@askjo/camofox-browser). Regenerate the lockfile."
+    )

From c844d15c3d27991a35bbc4ec56558d85122412c9 Mon Sep 17 00:00:00 2001
From: briandevans <252620095+briandevans@users.noreply.github.com>
Date: Sat, 2 May 2026 08:15:11 -0700
Subject: [PATCH 139/218] fix(update): stream npm install output so postinstall
 progress is visible (#18840)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`hermes update` ran the repo-root and ui-tui npm installs with both
`--silent` and `subprocess.run(..., capture_output=True)`, which hides
all output from optional postinstall scripts.  The largest of those —
`@askjo/camofox-browser`'s `npx camoufox-js fetch` — downloads a
Firefox-fork browser binary that can take many minutes on slow
connections.  Because nothing was printed during that wait, the updater
appeared to hang at "Updating Node.js dependencies..." and users
Ctrl-C'd, sometimes leaving `node_modules` partially installed.

Drop `--silent` and pass `capture_output=False` for the repo-root and
ui-tui paths so npm streams its `info run …` postinstall lines straight
to the terminal.  Output is still mirrored to `~/.hermes/logs/update.log`
by the existing `_UpdateOutputStream` wrapper, so SSH-disconnect safety
is preserved.

The `web/` install path is untouched — its build step is fast and does
not run binary-fetching postinstalls.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hermes_cli/main.py                  | 11 ++++++++--
 tests/hermes_cli/test_cmd_update.py | 31 +++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 41c4a23f932..a893ee85846 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -7226,17 +7226,24 @@ def _update_node_dependencies() -> None:
         if not (path / "package.json").exists():
             continue
 
+        # Stream npm output (no `--silent`, no `capture_output`) so any
+        # optional dependency postinstall scripts (e.g. `agent-browser`'s
+        # Chromium fetch on first install) print progress instead of
+        # appearing to hang silently for minutes (#18840).  The
+        # `_UpdateOutputStream` wrapper installed by the updater mirrors
+        # streamed output to ``~/.hermes/logs/update.log`` so nothing is lost.
         result = _run_npm_install_deterministic(
             npm,
             path,
-            extra_args=("--silent", "--no-fund", "--no-audit", "--progress=false"),
+            extra_args=("--no-fund", "--no-audit", "--progress=false"),
+            capture_output=False,
         )
         if result.returncode == 0:
             print(f"  ✓ {label}")
             continue
 
         print(f"  ⚠ npm install failed in {label}")
-        stderr = (result.stderr or "").strip()
+        stderr = (result.stderr or "").strip() if result.stderr else ""
         if stderr:
             print(f"    {stderr.splitlines()[-1]}")
 
diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py
index f059e54ac05..2f4b836286b 100644
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@@ -130,17 +130,22 @@ class TestCmdUpdateBranchFallback:
         #   1. repo root  — slash-command / TUI bridge deps
         #   2. ui-tui/    — Ink TUI deps
         #   3. web/       — install + "npm run build" for the web frontend
-        full_flags = [
+        #
+        # Repo-root and ui-tui installs intentionally omit `--silent` and run
+        # without `capture_output` so optional postinstall scripts (e.g.
+        # `@askjo/camofox-browser`'s browser-binary fetch) print progress —
+        # otherwise long downloads look like a hang (#18840).  The web/ install
+        # keeps `--silent` because its build step is short and noisy.
+        update_flags = [
             "/usr/bin/npm",
             "ci",
-            "--silent",
             "--no-fund",
             "--no-audit",
             "--progress=false",
         ]
         assert npm_calls[:2] == [
-            (full_flags, PROJECT_ROOT),
-            (full_flags, PROJECT_ROOT / "ui-tui"),
+            (update_flags, PROJECT_ROOT),
+            (update_flags, PROJECT_ROOT / "ui-tui"),
         ]
         if len(npm_calls) > 2:
             assert npm_calls[2:] == [
@@ -148,6 +153,24 @@ class TestCmdUpdateBranchFallback:
                 (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
             ]
 
+        # Regression for #18840: repo root + ui-tui installs must stream
+        # output (capture_output=False) so postinstall progress is visible
+        # to the user.
+        repo_and_tui_calls = [
+            call
+            for call in mock_run.call_args_list
+            if call.args
+            and call.args[0][0] == "/usr/bin/npm"
+            and call.args[0][1] == "ci"
+            and call.kwargs.get("cwd") in (PROJECT_ROOT, PROJECT_ROOT / "ui-tui")
+        ]
+        assert len(repo_and_tui_calls) == 2
+        for call in repo_and_tui_calls:
+            assert call.kwargs.get("capture_output") is False, (
+                "repo-root / ui-tui npm install must stream output "
+                "(no capture_output) so postinstall progress is visible"
+            )
+
     def test_update_non_interactive_runs_safe_config_migrations(self, mock_args, capsys):
         """Dashboard/web updates apply non-interactive migrations before restart."""
         with patch("shutil.which", return_value=None), patch(

From fc03c95da13105807cb3b3f42a311e4916b456ce Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 12:51:08 -0700
Subject: [PATCH 140/218] feat(cli): add /exit --delete flag to remove session
 on quit (#27101)

Port from google-gemini/gemini-cli#19332.

Users can now exit with '/exit --delete' (or '/quit --delete', '/exit -d')
to permanently remove the current session's SQLite history plus on-disk
transcripts (*.json / *.jsonl / request_dump_*) in one shot. Useful for
privacy-sensitive workflows and one-off interactions where leaving a
session recording behind is undesirable.

Implementation:
- New HermesCLI._delete_session_on_exit one-shot flag (defaults False).
- process_command() parses --delete / -d after /exit or /quit and arms
  the flag. Unknown args print a hint and keep the CLI running (prevents
  typos like '/exit -delete' from accidentally exiting).
- Shutdown path calls SessionDB.delete_session(session_id, sessions_dir=...)
  right after end_session() when the flag is set. That API already
  existed for 'hermes sessions delete' and handles both SQLite removal
  (orphaning child sessions so FK constraints hold) and on-disk file
  cleanup.
- /quit CommandDef now advertises '[--delete]' in args_hint so /help
  and CLI autocomplete surface it.

Tests: tests/cli/test_exit_delete_session.py (12 cases covering both
aliases, case insensitivity, whitespace, short form, unknown-arg
rejection, and registry metadata).

E2E-verified with isolated HERMES_HOME: session row deleted, all three
transcript/request-dump files removed, second delete_session call
correctly returns False.
---
 cli.py                                   |  28 ++++++
 hermes_cli/commands.py                   |   4 +-
 tests/cli/test_exit_delete_session.py    | 119 +++++++++++++++++++++++
 website/docs/reference/slash-commands.md |   2 +-
 4 files changed, 150 insertions(+), 3 deletions(-)
 create mode 100644 tests/cli/test_exit_delete_session.py

diff --git a/cli.py b/cli.py
index 241d41e9fcd..12f9ee98fb8 100644
--- a/cli.py
+++ b/cli.py
@@ -2824,6 +2824,11 @@ class HermesCLI:
         # turn (which would make Ctrl+C feel like it did nothing).
         self._last_turn_interrupted = False
         self._should_exit = False
+        # /exit --delete: when True, the current session's SQLite history and
+        # on-disk transcripts are deleted during shutdown. Set by
+        # process_command() when the user runs /exit --delete or /quit --delete.
+        # Ported from google-gemini/gemini-cli#19332.
+        self._delete_session_on_exit = False
         self._last_ctrl_c_time = 0
         self._clarify_state = None
         self._clarify_freetext = False
@@ -7653,6 +7658,16 @@ class HermesCLI:
         canonical = _cmd_def.name if _cmd_def else _base_word
         
         if canonical in {"quit", "exit"}:
+            # Parse --delete flag: /exit --delete also removes the current
+            # session's transcripts + SQLite history. Ported from
+            # google-gemini/gemini-cli#19332.
+            _rest = cmd_original.split(None, 1)
+            _args = (_rest[1] if len(_rest) > 1 else "").strip().lower()
+            if _args in ("--delete", "-d"):
+                self._delete_session_on_exit = True
+            elif _args:
+                _cprint(f"  {_DIM}✗ Unknown argument: {_escape(_args)}. Use /exit --delete to also remove session history.{_RST}")
+                return True
             return False
         elif canonical == "help":
             self.show_help()
@@ -13822,6 +13837,19 @@ class HermesCLI:
                     self._session_db.end_session(self.agent.session_id, "cli_close")
                 except (Exception, KeyboardInterrupt) as e:
                     logger.debug("Could not close session in DB: %s", e)
+                # /exit --delete: also remove the current session's transcripts
+                # and SQLite history. Ported from google-gemini/gemini-cli#19332.
+                if getattr(self, '_delete_session_on_exit', False):
+                    try:
+                        from hermes_constants import get_hermes_home as _ghh
+                        _sessions_dir = _ghh() / "sessions"
+                        _sid = self.agent.session_id
+                        if self._session_db.delete_session(_sid, sessions_dir=_sessions_dir):
+                            _cprint(f"  {_DIM}✓ Session {_escape(_sid)} deleted{_RST}")
+                        else:
+                            _cprint(f"  {_DIM}✗ Session {_escape(_sid)} not found for deletion{_RST}")
+                    except (Exception, KeyboardInterrupt) as e:
+                        logger.debug("Could not delete session on exit: %s", e)
             # Plugin hook: on_session_end — safety net for interrupted exits.
             # run_conversation() already fires this per-turn on normal completion,
             # so only fire here if the agent was mid-turn (_agent_running) when
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 83d86c4a3a9..07e5b5e5c4a 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -211,8 +211,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
     CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),
 
     # Exit
-    CommandDef("quit", "Exit the CLI", "Exit",
-               cli_only=True, aliases=("exit",)),
+    CommandDef("quit", "Exit the CLI (use --delete to also remove session history)", "Exit",
+               cli_only=True, aliases=("exit",), args_hint="[--delete]"),
 ]
 
 
diff --git a/tests/cli/test_exit_delete_session.py b/tests/cli/test_exit_delete_session.py
new file mode 100644
index 00000000000..dd4fe8d5aa1
--- /dev/null
+++ b/tests/cli/test_exit_delete_session.py
@@ -0,0 +1,119 @@
+"""Tests for `/exit --delete` and `/quit --delete` session deletion.
+
+Ports the behavior from google-gemini/gemini-cli#19332: running `/exit` or
+`/quit` with the `--delete` flag arms a one-shot `_delete_session_on_exit`
+flag that the CLI shutdown path uses to remove the current session from
+SQLite + on-disk transcripts before exit.
+"""
+
+from unittest.mock import MagicMock
+
+
+def _make_cli():
+    """Bare HermesCLI suitable for process_command() tests.
+
+    Uses ``__new__`` to skip the heavy __init__; only sets the attributes
+    the /exit branch touches.
+    """
+    from cli import HermesCLI
+    cli = HermesCLI.__new__(HermesCLI)
+    cli.config = {}
+    cli.console = MagicMock()
+    cli.agent = None
+    cli.conversation_history = []
+    cli.session_id = "test-session"
+    cli._delete_session_on_exit = False
+    return cli
+
+
+class TestExitDeleteFlag:
+    def test_plain_exit_does_not_arm_delete(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit")
+        assert result is False
+        assert cli._delete_session_on_exit is False
+
+    def test_plain_quit_does_not_arm_delete(self):
+        cli = _make_cli()
+        result = cli.process_command("/quit")
+        assert result is False
+        assert cli._delete_session_on_exit is False
+
+    def test_exit_delete_arms_flag(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit --delete")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_quit_delete_arms_flag(self):
+        cli = _make_cli()
+        result = cli.process_command("/quit --delete")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_exit_delete_short_form(self):
+        """`-d` is a convenience alias for `--delete`."""
+        cli = _make_cli()
+        result = cli.process_command("/exit -d")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_quit_alias_q_is_not_quit(self):
+        """`/q` is the alias for `/queue`, not `/quit`. This test documents
+        that /q --delete does NOT arm session deletion — it would dispatch
+        to /queue instead."""
+        cli = _make_cli()
+        cli._pending_input = __import__("queue").Queue()
+        # /q with no args shows a usage error and keeps the CLI running.
+        result = cli.process_command("/q")
+        assert result is not False  # queue command doesn't exit
+        assert cli._delete_session_on_exit is False
+
+    def test_delete_flag_is_case_insensitive(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit --DELETE")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_delete_flag_trims_whitespace(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit   --delete   ")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_unknown_exit_argument_does_not_exit(self):
+        """Unrecognised args should NOT exit the CLI — they surface an
+        error message and stay in the session. This prevents accidental
+        session destruction from typos like `/exit -delete`."""
+        cli = _make_cli()
+        result = cli.process_command("/exit --delte")
+        # process_command returns True = keep running
+        assert result is True
+        assert cli._delete_session_on_exit is False
+
+    def test_unknown_exit_argument_prints_help(self):
+        cli = _make_cli()
+        # _cprint goes through module-level print, so capture via console.
+        # We can't patch _cprint directly without import juggling; the
+        # previous assertion already proves the unknown-arg branch is
+        # reached (result True + flag False).
+        result = cli.process_command("/exit garbage")
+        assert result is True
+        assert cli._delete_session_on_exit is False
+
+
+class TestCommandRegistry:
+    def test_quit_command_advertises_delete_flag(self):
+        """The CommandDef args_hint should surface `--delete` in /help and
+        CLI autocomplete."""
+        from hermes_cli.commands import resolve_command
+        cmd = resolve_command("quit")
+        assert cmd is not None
+        assert cmd.args_hint == "[--delete]"
+
+    def test_exit_alias_resolves_to_quit_with_hint(self):
+        from hermes_cli.commands import resolve_command
+        cmd = resolve_command("exit")
+        assert cmd is not None
+        assert cmd.name == "quit"
+        assert cmd.args_hint == "[--delete]"
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 377c31c4477..05424c1cd18 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -98,7 +98,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 
 | Command | Description |
 |---------|-------------|
-| `/quit` | Exit the CLI (also: `/exit`). |
+| `/quit` | Exit the CLI (also: `/exit`). See note on `/q` under `/queue` above. Pass `--delete` (or `-d`) — e.g. `/exit --delete` — to also permanently remove the current session's SQLite history and on-disk transcripts before exiting. Useful for privacy-sensitive or one-off tasks. |
 
 ### Dynamic CLI slash commands
 

From dc3d0fe1489aebd5747fa620d9b2eec751a92a55 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 12:55:23 -0700
Subject: [PATCH 141/218] Port from cline/cline#10343: periodic gateway memory
 logging (#27102)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Emit a grep-friendly '[MEMORY] rss=...MB ...' line in agent.log /
gateway.log every N minutes (default 5) so slow leaks in the long-lived
gateway process show up as a time series. Based on
https://github.com/cline/cline/pull/10343
(src/standalone/memory-monitor.ts).

- gateway/memory_monitor.py: new module. Daemon thread, baseline on
  start, final snapshot on stop. Uses resource.getrusage() (stdlib)
  first, falls back to psutil, disables itself with one WARNING if
  neither is available.
- gateway/run.py: start monitor right after setup_logging() in
  start_gateway(); stop it in the shutdown block next to MCP teardown.
- hermes_cli/config.py: logging.memory_monitor { enabled, interval_seconds }
  defaults under the existing logging section.
- tests/gateway/test_memory_monitor.py: 10 unit tests covering format,
  baseline/shutdown snapshots, double-start noop, periodic timer,
  daemon thread invariant, and unavailable-RSS warn-and-skip path.

Adapted from TypeScript/Node to Python (threading.Event-based daemon
thread instead of setInterval/unref), added Python-specific gc + thread
counts to the log line (handier than ext/arrayBuffers for diagnosing
Python gateway leaks), and gated behind a config.yaml toggle so users
can silence the periodic line if they want.

No heap-snapshot-on-OOM equivalent — CPython doesn't have V8's
--heapsnapshot-near-heap-limit; tracemalloc would be the Python
equivalent but adds non-trivial overhead, so leaving that out.
---
 gateway/memory_monitor.py            | 230 +++++++++++++++++++++++++++
 gateway/run.py                       |  37 +++++
 hermes_cli/config.py                 |   9 ++
 tests/gateway/test_memory_monitor.py | 122 ++++++++++++++
 4 files changed, 398 insertions(+)
 create mode 100644 gateway/memory_monitor.py
 create mode 100644 tests/gateway/test_memory_monitor.py

diff --git a/gateway/memory_monitor.py b/gateway/memory_monitor.py
new file mode 100644
index 00000000000..bacbbba34ef
--- /dev/null
+++ b/gateway/memory_monitor.py
@@ -0,0 +1,230 @@
+"""Periodic process memory usage logging for the gateway.
+
+Ported from cline/cline#10343 (src/standalone/memory-monitor.ts).
+
+The gateway is a long-lived process that accumulates memory as it caches
+agent instances, session transcripts, tool schemas, memory providers, MCP
+connections, etc.  A slow leak in any of those subsystems is invisible
+in a single log line — you only see it by watching RSS climb over hours.
+
+This module emits a single structured ``[MEMORY] ...`` line every N
+minutes (default 5) so maintainers investigating a suspected leak can
+grep ``agent.log`` / ``gateway.log`` for a time series of RSS + Python
+GC stats.  The timer runs in a background thread and shuts down cleanly
+with the gateway.
+
+Design notes (parity with the Cline port):
+  * Grep-friendly single-line format beginning ``[MEMORY]``.
+  * Final snapshot logged on shutdown so "last RSS before exit" is
+    always in the log.
+  * Baseline snapshot logged immediately on start.
+  * Daemon thread — never blocks process exit.
+  * Uses ``resource`` (stdlib, Linux/macOS) first and falls back to
+    ``psutil`` when ``resource`` isn't available (Windows).  Both are
+    optional; when neither works we emit a single WARNING and disable
+    the monitor rather than crashing the gateway.
+
+Config: ``logging.memory_monitor`` in ``config.yaml`` — see
+``hermes_cli/config.py`` for the defaults block.
+"""
+
+from __future__ import annotations
+
+import gc
+import logging
+import os
+import sys
+import threading
+import time
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+_BYTES_TO_MB = 1024 * 1024
+
+_monitor_thread: Optional[threading.Thread] = None
+_stop_event: Optional[threading.Event] = None
+_start_time: Optional[float] = None
+_interval_seconds: float = 300.0  # 5 minutes
+_lock = threading.Lock()
+
+
+def _get_rss_mb() -> Optional[int]:
+    """Return current process resident set size in MB, or None if unavailable.
+
+    Tries ``resource.getrusage`` first (Linux/macOS, no extra deps), then
+    falls back to ``psutil`` which is an optional hermes-agent dep.
+    """
+    # Linux / macOS — resource is stdlib.  On Linux ru_maxrss is in KB,
+    # on macOS it is in bytes (yes, really).  We use it as a cheap
+    # "current" RSS — ru_maxrss reports the high-water mark for the
+    # process, which is what you actually want for leak detection.
+    try:
+        import resource
+
+        maxrss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        if sys.platform == "darwin":
+            return int(maxrss / _BYTES_TO_MB)
+        # Linux / other unices: KB
+        return int(maxrss / 1024)
+    except Exception:
+        pass
+
+    # Fallback: psutil (Windows, or unusual unix without resource).
+    try:
+        import psutil  # type: ignore
+
+        rss = psutil.Process(os.getpid()).memory_info().rss
+        return int(rss / _BYTES_TO_MB)
+    except Exception:
+        return None
+
+
+def log_memory_usage(prefix: str = "") -> None:
+    """Log current memory usage in a grep-friendly ``[MEMORY] ...`` line.
+
+    Safe to call on-demand from any thread at important lifecycle
+    moments (after shutdown, after context compression, etc.).
+
+    Parameters
+    ----------
+    prefix
+        Optional extra tag inserted after ``[MEMORY]`` — e.g.
+        ``"baseline"``, ``"shutdown"``.
+    """
+    rss = _get_rss_mb()
+    uptime = int(time.monotonic() - _start_time) if _start_time else 0
+    # gc.get_stats() returns per-generation collection counts; the sum
+    # is a cheap proxy for "how much garbage have we created".
+    try:
+        gc_counts = gc.get_count()  # (gen0, gen1, gen2)
+    except Exception:
+        gc_counts = (0, 0, 0)
+    # Thread count is a handy correlate when diagnosing thread leaks.
+    try:
+        thread_count = threading.active_count()
+    except Exception:
+        thread_count = 0
+
+    tag = f"{prefix} " if prefix else ""
+    if rss is None:
+        logger.info(
+            "[MEMORY] %srss=unavailable gc=%s threads=%d uptime=%ds",
+            tag,
+            gc_counts,
+            thread_count,
+            uptime,
+        )
+    else:
+        logger.info(
+            "[MEMORY] %srss=%dMB gc=%s threads=%d uptime=%ds",
+            tag,
+            rss,
+            gc_counts,
+            thread_count,
+            uptime,
+        )
+
+
+def _monitor_loop(stop_event: threading.Event, interval: float) -> None:
+    """Background thread body — log every ``interval`` seconds until stopped."""
+    while not stop_event.wait(interval):
+        try:
+            log_memory_usage()
+        except Exception as e:
+            # Never let the monitor crash the gateway; just log and carry on.
+            logger.debug("Memory monitor iteration failed: %s", e)
+
+
+def start_memory_monitoring(interval_seconds: float = 300.0) -> bool:
+    """Start periodic memory usage logging in a daemon thread.
+
+    Logs immediately to capture a baseline, then every ``interval_seconds``.
+    Safe to call multiple times — subsequent calls are no-ops while the
+    first monitor is still running.
+
+    Parameters
+    ----------
+    interval_seconds
+        How often to log.  Default 300s (5 minutes), matching the
+        upstream cline/cline implementation.
+
+    Returns
+    -------
+    bool
+        True if a fresh monitor thread was started, False if one was
+        already running or if memory introspection isn't available.
+    """
+    global _monitor_thread, _stop_event, _start_time, _interval_seconds
+
+    with _lock:
+        if _monitor_thread is not None and _monitor_thread.is_alive():
+            return False
+
+        # Sanity-check that we can read RSS at all.  If neither resource
+        # nor psutil works, no point spinning a thread that can only log
+        # "rss=unavailable" forever — warn once and bail.
+        if _get_rss_mb() is None:
+            logger.warning(
+                "[MEMORY] Memory monitoring unavailable: neither resource.getrusage "
+                "nor psutil could read process RSS — skipping periodic logging.",
+            )
+            return False
+
+        _start_time = time.monotonic()
+        _interval_seconds = float(interval_seconds)
+        _stop_event = threading.Event()
+
+        # Baseline snapshot before the loop starts.
+        log_memory_usage(prefix="baseline")
+
+        _monitor_thread = threading.Thread(
+            target=_monitor_loop,
+            args=(_stop_event, _interval_seconds),
+            name="gateway-memory-monitor",
+            daemon=True,
+        )
+        _monitor_thread.start()
+
+        logger.info(
+            "[MEMORY] Periodic memory monitoring started (interval: %ds)",
+            int(_interval_seconds),
+        )
+        return True
+
+
+def stop_memory_monitoring(timeout: float = 2.0) -> None:
+    """Stop the monitor thread and log a final snapshot.
+
+    Safe to call even if ``start_memory_monitoring()`` was never called.
+    """
+    global _monitor_thread, _stop_event
+
+    with _lock:
+        if _stop_event is None or _monitor_thread is None:
+            return
+
+        # Final snapshot before teardown so "last RSS" is always in the log.
+        try:
+            log_memory_usage(prefix="shutdown")
+        except Exception:
+            pass
+
+        _stop_event.set()
+        thread = _monitor_thread
+        _monitor_thread = None
+        _stop_event = None
+
+    # Join outside the lock so a stuck log call can't deadlock shutdown.
+    try:
+        thread.join(timeout=timeout)
+    except Exception:
+        pass
+
+    logger.info("[MEMORY] Periodic memory monitoring stopped")
+
+
+def is_running() -> bool:
+    """True if the background monitor thread is alive."""
+    with _lock:
+        return _monitor_thread is not None and _monitor_thread.is_alive()
diff --git a/gateway/run.py b/gateway/run.py
index f9a282a413f..a5eaafcb063 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -16800,6 +16800,33 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     from hermes_logging import setup_logging
     setup_logging(hermes_home=_hermes_home, mode="gateway")
 
+    # Periodic process memory usage logging (gateway only) — emits a
+    # grep-friendly "[MEMORY] rss=...MB ..." line every N minutes so
+    # slow leaks in the long-lived gateway process show up as a time
+    # series in agent.log / gateway.log.  Ported from cline/cline#10343.
+    # Controlled by the logging.memory_monitor section in config.yaml.
+    try:
+        from gateway import memory_monitor as _memory_monitor
+
+        _mm_cfg = {}
+        try:
+            # config is loaded a few lines up; re-read the logging section
+            # here so we pick up user overrides without coupling to local
+            # variable names inside the start_gateway body.
+            from hermes_cli.config import load_config as _load_cli_config
+
+            _mm_cfg = (_load_cli_config() or {}).get("logging", {}).get("memory_monitor", {}) or {}
+        except Exception:
+            _mm_cfg = {}
+        if _mm_cfg.get("enabled", True):
+            try:
+                _mm_interval = float(_mm_cfg.get("interval_seconds", 300))
+            except (TypeError, ValueError):
+                _mm_interval = 300.0
+            _memory_monitor.start_memory_monitoring(interval_seconds=_mm_interval)
+    except Exception as _mm_exc:
+        logger.debug("Failed to start memory monitor: %s", _mm_exc)
+
     # Optional stderr handler — level driven by -v/-q flags on the CLI.
     # verbosity=None (-q/--quiet): no stderr output
     # verbosity=0    (default):    WARNING and above
@@ -17016,6 +17043,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     except Exception:
         pass
 
+    # Stop the periodic memory monitor (if it was started above).
+    # This also emits one final "[MEMORY] shutdown rss=..." line so the
+    # last RSS reading before gateway exit is always in the log.
+    try:
+        from gateway import memory_monitor as _memory_monitor
+
+        _memory_monitor.stop_memory_monitoring(timeout=2.0)
+    except Exception:
+        pass
+
     if runner.exit_code is not None:
         raise SystemExit(runner.exit_code)
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 574f2397d91..81706d1edb4 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1477,6 +1477,15 @@ DEFAULT_CONFIG = {
         "level": "INFO",       # Minimum level for agent.log: DEBUG, INFO, WARNING
         "max_size_mb": 5,      # Max size per log file before rotation
         "backup_count": 3,     # Number of rotated backup files to keep
+        # Periodic process memory usage logging (gateway only). Emits a
+        # grep-friendly "[MEMORY] rss=...MB ..." line at the configured
+        # interval so slow leaks in the long-lived gateway are visible
+        # in agent.log / gateway.log as a time series. Ported from
+        # cline/cline#10343.
+        "memory_monitor": {
+            "enabled": True,         # Flip to false to silence the periodic line
+            "interval_seconds": 300, # Default: every 5 minutes
+        },
     },
 
     # Remotely-hosted model catalog manifest.  When enabled, the CLI fetches
diff --git a/tests/gateway/test_memory_monitor.py b/tests/gateway/test_memory_monitor.py
new file mode 100644
index 00000000000..64903dc81f8
--- /dev/null
+++ b/tests/gateway/test_memory_monitor.py
@@ -0,0 +1,122 @@
+"""Tests for gateway.memory_monitor — periodic process memory logging.
+
+Ported from cline/cline#10343.  The module logs a structured
+``[MEMORY] rss=...MB ...`` line periodically so long-running gateway
+leaks show up as a time series in agent.log / gateway.log.
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+
+import pytest
+
+from gateway import memory_monitor as mm
+
+
+@pytest.fixture(autouse=True)
+def _ensure_monitor_stopped():
+    """Every test starts from a clean state and leaves one behind."""
+    mm.stop_memory_monitoring(timeout=1.0)
+    yield
+    mm.stop_memory_monitoring(timeout=1.0)
+
+
+def test_log_memory_usage_emits_memory_line(caplog):
+    caplog.set_level(logging.INFO, logger="gateway.memory_monitor")
+    mm.log_memory_usage()
+    memory_lines = [r for r in caplog.records if "[MEMORY]" in r.getMessage()]
+    assert memory_lines, "expected at least one [MEMORY] log record"
+
+
+def test_log_memory_usage_has_grep_friendly_format(caplog):
+    caplog.set_level(logging.INFO, logger="gateway.memory_monitor")
+    mm.log_memory_usage()
+    msg = caplog.records[-1].getMessage()
+    # Grep-friendly contract: line starts with [MEMORY] and carries RSS
+    # (or 'unavailable'), GC counts, thread count, uptime.
+    assert msg.startswith("[MEMORY]"), msg
+    assert "rss=" in msg
+    assert "gc=" in msg
+    assert "threads=" in msg
+    assert "uptime=" in msg
+
+
+def test_log_memory_usage_with_prefix(caplog):
+    caplog.set_level(logging.INFO, logger="gateway.memory_monitor")
+    mm.log_memory_usage(prefix="baseline")
+    msg = caplog.records[-1].getMessage()
+    assert "[MEMORY] baseline " in msg
+
+
+def test_start_logs_baseline_and_returns_true(caplog):
+    caplog.set_level(logging.INFO, logger="gateway.memory_monitor")
+    # Large interval so the background timer never fires during the test —
+    # we're only checking the synchronous baseline behavior here.
+    started = mm.start_memory_monitoring(interval_seconds=3600.0)
+    assert started is True
+    assert mm.is_running() is True
+
+    messages = [r.getMessage() for r in caplog.records]
+    assert any("[MEMORY] baseline " in m for m in messages), messages
+    assert any("Periodic memory monitoring started" in m for m in messages), messages
+
+
+def test_double_start_is_noop():
+    assert mm.start_memory_monitoring(interval_seconds=3600.0) is True
+    assert mm.start_memory_monitoring(interval_seconds=3600.0) is False
+    assert mm.is_running() is True
+
+
+def test_stop_logs_shutdown_snapshot(caplog):
+    mm.start_memory_monitoring(interval_seconds=3600.0)
+    caplog.clear()
+    caplog.set_level(logging.INFO, logger="gateway.memory_monitor")
+    mm.stop_memory_monitoring(timeout=1.0)
+    assert mm.is_running() is False
+
+    messages = [r.getMessage() for r in caplog.records]
+    assert any("[MEMORY] shutdown " in m for m in messages), messages
+    assert any("Periodic memory monitoring stopped" in m for m in messages), messages
+
+
+def test_stop_without_start_is_noop():
+    # Must not raise, must not log shutdown snapshot.
+    mm.stop_memory_monitoring(timeout=0.5)
+    assert mm.is_running() is False
+
+
+def test_periodic_timer_fires(caplog):
+    caplog.set_level(logging.INFO, logger="gateway.memory_monitor")
+    # Short interval so we can observe multiple ticks inside the test budget.
+    mm.start_memory_monitoring(interval_seconds=0.1)
+    time.sleep(0.45)
+    mm.stop_memory_monitoring(timeout=1.0)
+
+    periodic = [
+        r for r in caplog.records
+        if r.getMessage().startswith("[MEMORY] rss=") or r.getMessage().startswith("[MEMORY] rss=unavailable")
+    ]
+    # baseline + at least 2 periodic + shutdown — but shutdown has the
+    # "shutdown " prefix so it won't match the strict "[MEMORY] rss=" start.
+    # We expect >= 3 bare "[MEMORY] rss=..." lines.
+    assert len(periodic) >= 3, [r.getMessage() for r in caplog.records]
+
+
+def test_thread_is_daemon():
+    mm.start_memory_monitoring(interval_seconds=3600.0)
+    assert mm._monitor_thread is not None
+    assert mm._monitor_thread.daemon is True, (
+        "memory monitor thread must be daemon so it can never block process exit"
+    )
+
+
+def test_unavailable_rss_warns_and_does_not_start(caplog, monkeypatch):
+    # Force both backends to claim unavailable; start should bail.
+    monkeypatch.setattr(mm, "_get_rss_mb", lambda: None)
+    caplog.set_level(logging.WARNING, logger="gateway.memory_monitor")
+    started = mm.start_memory_monitoring(interval_seconds=3600.0)
+    assert started is False
+    assert mm.is_running() is False
+    assert any("Memory monitoring unavailable" in r.getMessage() for r in caplog.records)

From 93e109a1d552b03c847b96077428048cceb012cd Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 13:02:19 -0700
Subject: [PATCH 142/218] fix(moonshot): strip $ref siblings and collapse tuple
 items in tool schemas (#27104)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Port from anomalyco/opencode#24730: Moonshot's JSON Schema validator rejects
two shapes that the rest of the JSON Schema ecosystem accepts:

1. $ref nodes with sibling keywords. Moonshot expands the reference before
   validation and then rejects the node if keys like `description`, `type`,
   or `default` appear alongside $ref. MCP-sourced tool schemas commonly
   put a `description` on $ref-typed properties so the model sees the
   field hint — which worked on every provider except Moonshot.

2. Tuple-style `items` arrays (positional element schemas). Moonshot's
   engine requires ONE schema applied to every array element. Common in
   tool schemas generated from Go/Protobuf that model fixed-length arrays
   as `[{type:number}, {type:number}]`.

Repairs applied in `agent/moonshot_schema.py`:

- Rule 3: when a node has `$ref`, return `{"$ref": <value>}` only
  (strip every sibling). The referenced definition still carries its own
  description on the target node, which Moonshot accepts.
- Rule 4: when `items` is a list, collapse to the first element schema
  (falling back to `{}` which is then filled by the generic missing-type
  rule). Preserves `minItems` / `maxItems` / other siblings.

Tests: 10 new cases across TestRefSiblingStripping + TestTupleItems,
plus the existing TestMissingTypeFilled::test_ref_node_is_not_given_synthetic_type
still passes (it asserted plain $ref passes through; now it passes through
as exactly `{"$ref": "..."}` which is strictly compatible).

All 35 tests in test_moonshot_schema.py pass.
---
 agent/moonshot_schema.py            |  31 ++++++
 tests/agent/test_moonshot_schema.py | 163 ++++++++++++++++++++++++++++
 2 files changed, 194 insertions(+)

diff --git a/agent/moonshot_schema.py b/agent/moonshot_schema.py
index f22176f936e..6f785af5469 100644
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -15,6 +15,18 @@ and MoonshotAI/kimi-cli#1595:
 2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
    the parent.  Presence of both causes "type should be defined in anyOf
    items instead of the parent schema".
+3. ``enum`` arrays on scalar-typed nodes may not contain ``null`` or empty
+   strings.  Strip those entries (drop the enum entirely if it becomes empty).
+4. ``$ref`` nodes may not carry sibling keywords.  Moonshot expands the
+   reference before validation and then rejects the node if sibling keys
+   like ``description`` remain on the same node as ``$ref``.  Strip every
+   sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
+   (Ported from anomalyco/opencode#24730.)
+5. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
+   for positional element schemas).  Moonshot's schema engine requires a
+   single object schema applied to every array element.  Collapse tuple
+   ``items`` to the first element schema (or ``{}`` if the tuple is empty).
+   (Ported from anomalyco/opencode#24730.)
 
 The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
 handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
@@ -66,6 +78,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
             }
         elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
             repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key == "items" and isinstance(value, list):
+            # Rule 5: tuple-style ``items`` arrays (positional element
+            # schemas) are not accepted by Moonshot.  Collapse to the
+            # first element schema if present, else to ``{}``.  This
+            # matches opencode's behaviour for moonshotai / kimi models.
+            first = value[0] if value else {}
+            if isinstance(first, dict):
+                repaired[key] = _repair_schema(first, is_schema=True)
+            else:
+                repaired[key] = first
         elif key in _SCHEMA_NODE_KEYS:
             # items / not / additionalProperties: single nested schema.
             # additionalProperties can also be a bool — leave those alone.
@@ -130,6 +152,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
             else:
                 repaired.pop("enum")
 
+    # Rule 4: $ref nodes must not have sibling keywords.  Moonshot expands
+    # the reference before validation and then rejects the node if siblings
+    # like ``description`` / ``type`` / ``default`` appear alongside $ref.
+    # The referenced definition still carries its own description on the
+    # target node, which Moonshot accepts.
+    # (Ported from anomalyco/opencode#24730.)
+    if "$ref" in repaired:
+        return {"$ref": repaired["$ref"]}
+
     return repaired
 
 
diff --git a/tests/agent/test_moonshot_schema.py b/tests/agent/test_moonshot_schema.py
index 2ce2daa096a..8ba508c5dbd 100644
--- a/tests/agent/test_moonshot_schema.py
+++ b/tests/agent/test_moonshot_schema.py
@@ -6,6 +6,11 @@ the JSON Schema ecosystem accepts:
 1. Properties without ``type`` — Moonshot requires ``type`` on every node.
 2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
    ``anyOf`` children.
+3. ``$ref`` with sibling keywords — Moonshot expands the ref first and then
+   rejects ``description``/``type`` siblings on the same node.
+   (Ported from anomalyco/opencode#24730.)
+4. Tuple-style ``items`` arrays — Moonshot requires a single item schema,
+   not positional ones. (Ported from anomalyco/opencode#24730.)
 
 These tests cover the repairs applied by ``agent/moonshot_schema.py``.
 """
@@ -180,6 +185,164 @@ class TestAnyOfParentType:
         assert db_type["enum"] == ["mysql", "postgresql"]  # "" stripped by enum cleanup
 
 
+class TestRefSiblingStripping:
+    """Rule 4: ``$ref`` nodes may not carry sibling keywords on Moonshot.
+
+    Ported from anomalyco/opencode#24730.  The real-world failure was MCP tools
+    whose generated schemas put a ``description`` on a ``$ref`` property so the
+    model would see the field's human-readable hint.  The reference stays — the
+    referenced definition still owns the description (on the target node itself)
+    and still serves the model's context.
+    """
+
+    def test_description_sibling_stripped_from_ref(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "variantOptions": {
+                    "$ref": "#/$defs/VariantOptions",
+                    "description": "Required. The variant options for generation.",
+                },
+            },
+            "$defs": {
+                "VariantOptions": {
+                    "type": "object",
+                    "properties": {},
+                    "description": "Configuration options.",
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        # Sibling stripped.
+        assert out["properties"]["variantOptions"] == {"$ref": "#/$defs/VariantOptions"}
+        # The target definition's own description is preserved — we only strip
+        # siblings ON the $ref node, not on the thing it points at.
+        assert out["$defs"]["VariantOptions"]["description"] == "Configuration options."
+
+    def test_multiple_siblings_all_stripped(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "p": {
+                    "$ref": "#/$defs/T",
+                    "type": "object",
+                    "description": "x",
+                    "default": {},
+                    "title": "P",
+                },
+            },
+            "$defs": {"T": {"type": "object"}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["p"] == {"$ref": "#/$defs/T"}
+
+    def test_ref_without_siblings_unchanged(self):
+        params = {
+            "type": "object",
+            "properties": {"p": {"$ref": "#/$defs/T"}},
+            "$defs": {"T": {"type": "object"}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["p"] == {"$ref": "#/$defs/T"}
+
+    def test_ref_inside_anyof_children(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "v": {
+                    "anyOf": [
+                        {"$ref": "#/$defs/A", "description": "variant A"},
+                        {"type": "null"},
+                    ],
+                },
+            },
+            "$defs": {"A": {"type": "object"}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        # Main's existing Rule 2 collapses anyOf-with-null down to the
+        # single non-null branch (Moonshot rejects null branches in anyOf
+        # outright).  That branch was originally `{"$ref": ..., "description": ...}`;
+        # Rule 4 then strips the sibling, leaving exactly `{"$ref": "..."}`.
+        # The test name still applies — Rule 4 ran on the $ref branch — it
+        # just happens after the anyOf collapse on this input.
+        assert out["properties"]["v"] == {"$ref": "#/$defs/A"}
+
+
+class TestTupleItems:
+    """Rule 5: tuple-style ``items`` arrays collapse to a single schema.
+
+    Ported from anomalyco/opencode#24730.  Moonshot's schema engine requires
+    ``items`` to be ONE schema object applied to every array element; tuple-
+    style positional item schemas are rejected.  We collapse to the first
+    element's schema (which is the "closest" interpretation of positional →
+    single) and drop the rest.
+    """
+
+    def test_tuple_items_collapsed_to_first(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "renderedSize": {
+                    "type": "array",
+                    "items": [{"type": "number"}, {"type": "number"}],
+                    "minItems": 2,
+                    "maxItems": 2,
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["renderedSize"]["items"] == {"type": "number"}
+        # Sibling constraints are preserved — only the tuple shape is repaired.
+        assert out["properties"]["renderedSize"]["minItems"] == 2
+
+    def test_empty_tuple_items_becomes_empty_schema(self):
+        # Empty tuple collapses to ``{}``; the generic repair then fills a
+        # synthetic ``type`` because Moonshot requires ``type`` on every
+        # schema node.  Either ``{}`` or ``{"type": "string"}`` is a valid
+        # final shape for Moonshot — both accept any string element — but we
+        # always go through ``_fill_missing_type`` so the result is fully
+        # well-formed without needing the consumer to patch it later.
+        params = {
+            "type": "object",
+            "properties": {
+                "things": {"type": "array", "items": []},
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        items = out["properties"]["things"]["items"]
+        # Must be a dict and must carry a ``type`` (the whole point of Rule 1).
+        assert isinstance(items, dict)
+        assert items.get("type")
+
+    def test_tuple_items_first_element_is_repaired(self):
+        # The first element itself has a missing type — it should be filled.
+        params = {
+            "type": "object",
+            "properties": {
+                "pair": {
+                    "type": "array",
+                    "items": [{"description": "first"}, {"description": "second"}],
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        # Repaired to a single schema with a synthetic type.
+        assert out["properties"]["pair"]["items"] == {
+            "description": "first",
+            "type": "string",
+        }
+
+    def test_single_schema_items_unchanged(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "tags": {"type": "array", "items": {"type": "string"}},
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["tags"]["items"] == {"type": "string"}
+
+
 class TestTopLevelGuarantees:
     """The returned top-level schema is always a well-formed object."""
 

From fb05f5d4b58d4fb20c3a4a98c2c150de3f729f3c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 13:06:56 -0700
Subject: [PATCH 143/218] fix(mcp): validate remote URLs up-front with a clear
 error (#27105)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Port from anomalyco/opencode#25019 ("fix: handle invalid mcp urls").

Previously: a typo in `config.yaml` (missing scheme, wrong scheme,
empty string, non-string value) slipped past `_is_http()` and hit
`httpx.URL(url)` or `streamablehttp_client(url, ...)` deep in the
transport layer. That raised a generic exception which went through
the reconnect-backoff loop, so a bad URL caused _MAX_INITIAL_CONNECT_RETRIES
attempts with doubling backoff — about a minute of pointless retries
plus an opaque error — before the server was marked failed.

Now: we validate the URL once, at the top of `run()`, before
entering the retry loop. A malformed URL raises `InvalidMcpUrlError`
(a `ValueError` subclass) with a message that names the offending
server and explains exactly what was wrong. `_ready` is set and
`_error` is populated, so `start()` re-raises and the server shows
up as failed in `hermes mcp list` without any backoff burn.

Validation rules:
- Must be a string (rejects None, dict, int)
- Must be non-empty (rejects '' and whitespace-only)
- Scheme must be http or https (rejects file://, ws://, stdio://)
- Must have a non-empty host (rejects http:///, http://:8080)

Tests (21 new cases in tests/tools/test_mcp_invalid_url.py):
- TestValidUrlsAccepted: http, https, IPv6, ports, paths, query strings
- TestInvalidUrlsRejected: every rejection path above + clear error text
- TestErrorIsValueError: downstream code catching ValueError still works

E2E verified: a misconfigured server with `url: not-a-valid-url`
now fails in <0.001s with the clear error, instead of minutes of retries.

Doesn't touch stdio servers (they use `command`, not `url`) — the
validator only fires when `_is_http()` returns True.
---
 tests/tools/test_mcp_invalid_url.py | 125 ++++++++++++++++++++++++++++
 tools/mcp_tool.py                   |  82 ++++++++++++++++++
 2 files changed, 207 insertions(+)
 create mode 100644 tests/tools/test_mcp_invalid_url.py

diff --git a/tests/tools/test_mcp_invalid_url.py b/tests/tools/test_mcp_invalid_url.py
new file mode 100644
index 00000000000..539696292ad
--- /dev/null
+++ b/tests/tools/test_mcp_invalid_url.py
@@ -0,0 +1,125 @@
+"""Tests for the MCP remote-URL validator.
+
+Ported from anomalyco/opencode#25019 (``fix: handle invalid mcp urls``).
+
+Previously, a typo in ``config.yaml`` (missing scheme, wrong scheme, empty
+string, dict where a URL was expected) caused the MCP server startup code
+to enter httpx's URL-parsing path and crash inside the transport layer.
+The reconnect-backoff loop would then retry
+``_MAX_INITIAL_CONNECT_RETRIES`` times with doubling backoff — a minute or
+more of pointless retries plus a confusing opaque error message — before
+eventually giving up.
+
+The fix validates the URL once, up front, and fails fast with a specific
+error message identifying the offending server.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from tools.mcp_tool import (
+    InvalidMcpUrlError,
+    _validate_remote_mcp_url,
+)
+
+
+class TestValidUrlsAccepted:
+    """Every valid http(s) URL must pass through untouched (stripped of whitespace)."""
+
+    @pytest.mark.parametrize(
+        "url",
+        [
+            "http://localhost:3000/mcp",
+            "https://example.com/mcp",
+            "https://context7.liam.com/mcp",
+            "http://127.0.0.1:8080",
+            "https://api.example.com:443/v1/mcp?session=abc",
+            "http://[::1]:9000/mcp",  # IPv6
+            "https://host.example.com",  # no port, no path
+        ],
+    )
+    def test_accepts_valid_http_url(self, url):
+        assert _validate_remote_mcp_url("test", url) == url
+
+    def test_strips_surrounding_whitespace(self):
+        assert (
+            _validate_remote_mcp_url("test", "  https://example.com/mcp  ")
+            == "https://example.com/mcp"
+        )
+
+
+class TestInvalidUrlsRejected:
+    """Every broken shape must raise ``InvalidMcpUrlError`` with a clear message."""
+
+    def test_none_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="context7.*expected a string"):
+            _validate_remote_mcp_url("context7", None)
+
+    def test_dict_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="expected a string, got dict"):
+            _validate_remote_mcp_url("ctx", {"url": "nested"})
+
+    def test_int_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="expected a string, got int"):
+            _validate_remote_mcp_url("ctx", 8080)
+
+    def test_empty_string_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="empty url"):
+            _validate_remote_mcp_url("ctx", "")
+
+    def test_whitespace_only_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="empty url"):
+            _validate_remote_mcp_url("ctx", "   \t\n")
+
+    def test_missing_scheme_rejected(self):
+        # The most common typo — users copy a host from a web page.
+        with pytest.raises(
+            InvalidMcpUrlError, match="scheme must be http or https"
+        ):
+            _validate_remote_mcp_url("ctx", "example.com/mcp")
+
+    def test_file_scheme_rejected(self):
+        with pytest.raises(
+            InvalidMcpUrlError, match="scheme must be http or https"
+        ):
+            _validate_remote_mcp_url("ctx", "file:///etc/passwd")
+
+    def test_ws_scheme_rejected(self):
+        # WebSocket is not MCP's remote transport.
+        with pytest.raises(
+            InvalidMcpUrlError, match="scheme must be http or https"
+        ):
+            _validate_remote_mcp_url("ctx", "ws://example.com/mcp")
+
+    def test_stdio_scheme_rejected(self):
+        # stdio servers use the ``command`` key, not ``url``.
+        with pytest.raises(
+            InvalidMcpUrlError, match="scheme must be http or https"
+        ):
+            _validate_remote_mcp_url("ctx", "stdio:///node server.js")
+
+    def test_empty_host_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="missing host"):
+            _validate_remote_mcp_url("ctx", "http:///")
+
+    def test_empty_host_with_path_rejected(self):
+        with pytest.raises(InvalidMcpUrlError, match="missing host"):
+            _validate_remote_mcp_url("ctx", "https:///path/only")
+
+    def test_error_mentions_server_name(self):
+        # So users can find the bad entry when there are multiple configured.
+        with pytest.raises(InvalidMcpUrlError, match="my-weird-server"):
+            _validate_remote_mcp_url("my-weird-server", "not a url at all")
+
+
+class TestErrorIsValueError:
+    """InvalidMcpUrlError must be a ValueError for broad downstream catch blocks."""
+
+    def test_is_value_error(self):
+        try:
+            _validate_remote_mcp_url("ctx", "garbage")
+        except ValueError:
+            pass  # expected
+        else:
+            pytest.fail("expected ValueError")
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index b24bb9705ad..a46496ef59c 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -91,6 +91,7 @@ import threading
 import time
 from datetime import datetime
 from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
 
 logger = logging.getLogger(__name__)
 
@@ -492,6 +493,72 @@ def _cache_mcp_image_block(block) -> str:
     return f"MEDIA:{image_path}"
 
 
+# ---------------------------------------------------------------------------
+# Remote MCP URL validation
+# ---------------------------------------------------------------------------
+
+
+class InvalidMcpUrlError(ValueError):
+    """Raised when a remote MCP server's ``url`` cannot be parsed as http(s)://.
+
+    Validated once at startup so we fail fast with a clear message instead of
+    burning through the reconnect-backoff loop on every attempt.  (Ported from
+    anomalyco/opencode#25019.)
+    """
+
+
+def _validate_remote_mcp_url(server_name: str, url: Any) -> str:
+    """Return the URL as a string if it's a valid http(s) remote MCP URL.
+
+    Raises :class:`InvalidMcpUrlError` otherwise with a message naming the
+    offending server, so users can spot the bad entry in their config.
+
+    Accepts:
+    - ``http://host`` / ``https://host`` with optional port, path, query
+    - IPv4, IPv6 (bracketed), DNS hostnames
+
+    Rejects:
+    - Non-string values (``None``, dicts, ints)
+    - Missing scheme (``example.com/mcp``)
+    - Non-http(s) schemes (``file://``, ``ws://``, ``stdio:`` — stdio servers
+      use the ``command`` key, not ``url``)
+    - Empty host (``http://``, ``https:///path``)
+    """
+    if not isinstance(url, str):
+        raise InvalidMcpUrlError(
+            f"Invalid MCP URL for '{server_name}': expected a string, got "
+            f"{type(url).__name__}"
+        )
+    stripped = url.strip()
+    if not stripped:
+        raise InvalidMcpUrlError(
+            f"Invalid MCP URL for '{server_name}': empty url"
+        )
+    try:
+        parsed = urlparse(stripped)
+    except Exception as exc:  # urlparse is very permissive — belt and braces
+        raise InvalidMcpUrlError(
+            f"Invalid MCP URL for '{server_name}': {stripped!r} ({exc})"
+        ) from exc
+    if parsed.scheme.lower() not in ("http", "https"):
+        raise InvalidMcpUrlError(
+            f"Invalid MCP URL for '{server_name}': scheme must be http or "
+            f"https, got {parsed.scheme!r} ({stripped!r})"
+        )
+    if not parsed.netloc:
+        raise InvalidMcpUrlError(
+            f"Invalid MCP URL for '{server_name}': missing host ({stripped!r})"
+        )
+    # ``urlparse`` accepts ``http://:8080`` (empty host, explicit port).
+    # Reject that — we need a real host.
+    if not parsed.hostname:
+        raise InvalidMcpUrlError(
+            f"Invalid MCP URL for '{server_name}': missing hostname "
+            f"({stripped!r})"
+        )
+    return stripped
+
+
 def _format_connect_error(exc: BaseException) -> str:
     """Render nested MCP connection errors into an actionable short message."""
 
@@ -1458,6 +1525,21 @@ class MCPServerTask:
                 "this warning.",
                 self.name,
             )
+
+        # Validate remote URL once, up front.  Raising here (rather than
+        # letting it blow up inside the SDK's httpx layer on every retry)
+        # means a typo in config.yaml fails fast with a clear error — and
+        # critically, no reconnect-backoff burn.  (Ported from
+        # anomalyco/opencode#25019.)
+        if self._is_http():
+            try:
+                _validate_remote_mcp_url(self.name, config.get("url"))
+            except InvalidMcpUrlError as exc:
+                logger.warning("%s", exc)
+                self._error = exc
+                self._ready.set()
+                return
+
         retries = 0
         initial_retries = 0
         backoff = 1.0

From dffb602f37b3c1b9c9fd7f0417aab3af56cffa38 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 16:00:01 -0700
Subject: [PATCH 144/218] fix(xai): drop stale X Premium+ hint from entitlement
 403 surfacing (#27110)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xAI announced on 2026-05-16 (https://x.ai/news/grok-hermes) that X Premium
subscriptions now work in Hermes Agent. The hint we shipped in PR #26644
asserted the opposite ("X Premium+ does NOT include xAI API access — only
standalone SuperGrok subscribers can use this provider"), which would now
misdirect Premium+ users who hit any other 403 (no Grok sub at all, wrong
tier, exhausted quota) into thinking they need to switch subscriptions
when their sub is in fact valid.

Remove _decorate_xai_entitlement_error and its two call sites in
_summarize_api_error. xAI's own body text already says "Manage subscriptions
at https://grok.com/?_s=usage" — surface that verbatim and let xAI's wording
do the diagnosis.

The _is_entitlement_failure guard (which prevents credential-pool refresh
loops on entitlement 403s) and the reasoning-replay gating for xai-oauth
are unrelated and untouched.

Update tests to assert the body still surfaces verbatim and that no
Hermes-side editorializing is appended.
---
 run_agent.py                                  | 61 +-------------
 .../test_codex_xai_oauth_recovery.py          | 81 +++++--------------
 2 files changed, 24 insertions(+), 118 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 1dd4219b22e..61699607d1f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5046,63 +5046,6 @@ class AIAgent:
             return True
         return False
 
-    @staticmethod
-    def _decorate_xai_entitlement_error(detail: str) -> str:
-        """Append a neutral hint when xAI's OAuth surface returns the
-        permission-denied 403.
-
-        xAI's ``/v1/responses`` endpoint replies to several distinct failure
-        modes with the SAME body::
-
-            {"code": "The caller does not have permission to execute the
-             specified operation", "error": "You have either run out of
-             available resources or do not have an active Grok subscription.
-             Manage subscriptions at https://grok.com/?_s=usage or subscribe
-             at https://grok.com/supergrok"}
-
-        That body covers several real causes we cannot distinguish without
-        more info from xAI.  The most common (and least obvious) one is
-        that **X Premium+ does NOT include API access** — only standalone
-        SuperGrok subscribers can use Hermes against xai-oauth.  Lots of
-        users see Grok in their X app, assume it works here too, and hit
-        this 403 with no idea why.  Lead the hint with that.
-
-        Other possible causes:
-          * No Grok subscription at all
-          * SuperGrok tier doesn't include the requested model (e.g.
-            grok-4.3 may need a higher tier)
-          * Monthly quota exhausted (the ``?_s=usage`` URL hints at this)
-
-        Surface the raw xAI text verbatim and point at
-        https://grok.com/?_s=usage where the user can see WHICH applies.
-
-        Matched once per detail string — won't double-decorate if the
-        upstream already concatenated the same text.
-        """
-        if not detail:
-            return detail
-        lower = detail.lower()
-        is_entitlement = (
-            "do not have an active grok subscription" in lower
-            or ("out of available resources" in lower and "grok" in lower)
-            or ("does not have permission" in lower and "grok" in lower)
-        )
-        if not is_entitlement:
-            return detail
-        hint = (
-            " — xAI rejected this OAuth account. NOTE: X Premium+ does NOT "
-            "include xAI API access — only standalone SuperGrok subscribers "
-            "can use this provider. Other possible causes: no Grok "
-            "subscription, your tier doesn't include this model, or your "
-            "quota is exhausted. Check https://grok.com/?_s=usage to see "
-            "which, or run `/model` to switch providers."
-        )
-        # Idempotency: detect prior decoration by a substring unique to the
-        # hint (not present in xAI's own body text).
-        if "X Premium+ does NOT include" in detail:
-            return detail
-        return f"{detail}{hint}"
-
     @staticmethod
     def _summarize_api_error(error: Exception) -> str:
         """Extract a human-readable one-liner from an API error.
@@ -5142,12 +5085,12 @@ class AIAgent:
             if msg:
                 status_code = getattr(error, "status_code", None)
                 prefix = f"HTTP {status_code}: " if status_code else ""
-                return AIAgent._decorate_xai_entitlement_error(f"{prefix}{msg[:300]}")
+                return f"{prefix}{msg[:300]}"
 
         # Fallback: truncate the raw string but give more room than 200 chars
         status_code = getattr(error, "status_code", None)
         prefix = f"HTTP {status_code}: " if status_code else ""
-        return AIAgent._decorate_xai_entitlement_error(f"{prefix}{raw[:500]}")
+        return f"{prefix}{raw[:500]}"
 
     def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
         if not key:
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
index 9192d50695b..9eb641cc895 100644
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -158,19 +158,22 @@ def test_codex_stream_postlude_error_still_falls_back():
 
 
 # ---------------------------------------------------------------------------
-# Fix B: friendly entitlement message
+# Fix B: surface xAI's entitlement body verbatim (no editorializing)
+#
+# The original PR #26644 appended a hint that led with "X Premium+ does NOT
+# include xAI API access — only standalone SuperGrok subscribers can use this
+# provider."  xAI announced on 2026-05-16 that X Premium subs now work in
+# Hermes (https://x.ai/news/grok-hermes), making that hint actively wrong:
+# a Premium+ user hitting a real entitlement issue (no Grok sub, wrong tier,
+# exhausted quota) would be misdirected to switch subscriptions when their
+# Premium sub is in fact valid.  We now surface xAI's own body text verbatim
+# (which already says "Manage subscriptions at https://grok.com/?_s=usage")
+# and leave the diagnosis to xAI's wording.
 # ---------------------------------------------------------------------------
 
 
-def test_summarize_api_error_decorates_xai_entitlement_403():
-    """xAI's OAuth 403 must surface the X Premium+ gotcha + neutral causes.
-
-    Wording deliberately leads with the X Premium+ gotcha because that's
-    the #1 confusing case: people see Grok in their X app, assume it
-    works here too, and hit this 403 with no idea API access is a
-    separate SKU.  Other causes (no subscription, wrong tier, exhausted
-    quota) follow.
-    """
+def test_summarize_api_error_surfaces_xai_entitlement_body_verbatim():
+    """xAI's OAuth 403 body must surface as-is, with no Hermes-side hint."""
     from run_agent import AIAgent
 
     error = RuntimeError(
@@ -180,45 +183,15 @@ def test_summarize_api_error_decorates_xai_entitlement_403():
         "subscriptions at https://grok.com'}"
     )
     summary = AIAgent._summarize_api_error(error)
-    # The original xAI text must survive — it's still useful diagnostic info.
+    # xAI's own body text must reach the user — they need it to diagnose.
     assert "do not have an active Grok subscription" in summary
-    # The hint MUST lead with the X Premium+ gotcha (most likely cause
-    # for users who think they're subscribed).
-    assert "X Premium+ does NOT include" in summary
-    assert "standalone SuperGrok subscribers" in summary
-    # Other causes still listed.
-    assert "no Grok subscription" in summary
-    assert "tier doesn't include this model" in summary
-    assert "quota is exhausted" in summary
-    # The hint must point at the usage page where the user can verify.
-    assert "https://grok.com/?_s=usage" in summary
-    # Switching providers is still a valid escape hatch.
-    assert "/model" in summary
+    # No stale claim that X Premium is incompatible with Hermes.
+    assert "X Premium+ does NOT include" not in summary
+    assert "standalone SuperGrok subscribers" not in summary
 
 
-def test_summarize_api_error_does_not_accuse_subscribers():
-    """Hint must not confidently say the user has no subscription.
-
-    Don Piedro reported his subscription is active. The hint must not
-    contradict him — leading with the X Premium+ gotcha gives subscribers
-    a plausible reason ("oh, I'm on Premium+ not pure SuperGrok") instead
-    of accusing them of lying about having a subscription.
-    """
-    from run_agent import AIAgent
-
-    error = RuntimeError(
-        "HTTP 403: do not have an active Grok subscription"
-    )
-    summary = AIAgent._summarize_api_error(error)
-    # MUST NOT contain language that flatly assumes the user is unsubscribed.
-    assert "lacks SuperGrok" not in summary
-    assert "you are not subscribed" not in summary.lower()
-    # MUST lead with the most-likely-but-non-accusatory cause.
-    assert "X Premium+ does NOT include" in summary
-
-
-def test_summarize_api_error_decorates_xai_body_message():
-    """SDK-style error with structured body must also get the hint."""
+def test_summarize_api_error_xai_body_message_unwrapped():
+    """SDK-style error with structured body surfaces the message cleanly."""
     from run_agent import AIAgent
 
     class _XaiErr(Exception):
@@ -235,19 +208,9 @@ def test_summarize_api_error_decorates_xai_body_message():
 
     summary = AIAgent._summarize_api_error(_XaiErr("403"))
     assert "HTTP 403" in summary
-    assert "X Premium+ does NOT include" in summary
-
-
-def test_summarize_api_error_idempotent_for_entitlement_hint():
-    """Decorating twice must not double up the hint."""
-    from run_agent import AIAgent
-
-    raw = "HTTP 403: do not have an active Grok subscription"
-    once = AIAgent._decorate_xai_entitlement_error(raw)
-    twice = AIAgent._decorate_xai_entitlement_error(once)
-    assert once == twice
-    # Sanity: the hint did fire on the first pass.
-    assert "X Premium+ does NOT include" in once
+    assert "do not have an active Grok subscription" in summary
+    # No editorializing on top of xAI's own wording.
+    assert "X Premium+ does NOT include" not in summary
 
 
 def test_summarize_api_error_passes_through_unrelated_errors():

From e51d74ab917675a67e6a964d6c2c2ea2b150ac2c Mon Sep 17 00:00:00 2001
From: Maxim Esipov <maksesipov@gmail.com>
Date: Fri, 15 May 2026 17:19:31 +0300
Subject: [PATCH 145/218] fix(codex): rotate pool on usage limit 429

---
 run_agent.py                      | 12 ++++++--
 tests/run_agent/test_run_agent.py | 47 +++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 61699607d1f..ffe0ffbe67e 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5135,7 +5135,7 @@ class AIAgent:
         if isinstance(body, dict):
             payload = body.get("error") if isinstance(body.get("error"), dict) else body
         if isinstance(payload, dict):
-            reason = payload.get("code") or payload.get("error")
+            reason = payload.get("code") or payload.get("type") or payload.get("error")
             if isinstance(reason, str) and reason.strip():
                 context["reason"] = reason.strip()
             message = payload.get("message") or payload.get("error_description")
@@ -7583,7 +7583,15 @@ class AIAgent:
             return False, has_retried_429
 
         if effective_reason == FailoverReason.rate_limit:
-            if not has_retried_429:
+            usage_limit_reached = False
+            if error_context:
+                context_reason = str(error_context.get("reason") or "").lower()
+                context_message = str(error_context.get("message") or "").lower()
+                usage_limit_reached = (
+                    "usage_limit_reached" in context_reason
+                    or "usage limit has been reached" in context_message
+                )
+            if not has_retried_429 and not usage_limit_reached:
                 return False, True
             rotate_status = status_code if status_code is not None else 429
             next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context)
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index cd62cd41ded..8d56ff6425a 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -3746,6 +3746,37 @@ class TestCredentialPoolRecovery:
         assert retry_same is False
         agent._swap_credential.assert_called_once_with(next_entry)
 
+    def test_recover_with_pool_rotates_usage_limit_429_immediately(self, agent):
+        next_entry = SimpleNamespace(label="secondary")
+        captured = {}
+
+        class _Pool:
+            def current(self):
+                return SimpleNamespace(label="primary")
+
+            def mark_exhausted_and_rotate(self, *, status_code, error_context=None):
+                captured["status_code"] = status_code
+                captured["error_context"] = error_context
+                return next_entry
+
+        agent._credential_pool = _Pool()
+        agent._swap_credential = MagicMock()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=429,
+            has_retried_429=False,
+            error_context={
+                "reason": "usage_limit_reached",
+                "message": "The usage limit has been reached",
+            },
+        )
+
+        assert recovered is True
+        assert retry_same is False
+        assert captured["status_code"] == 429
+        assert captured["error_context"]["reason"] == "usage_limit_reached"
+        agent._swap_credential.assert_called_once_with(next_entry)
+
 
     def test_recover_with_pool_refreshes_on_401(self, agent):
         """401 with successful refresh should swap to refreshed credential."""
@@ -3832,6 +3863,22 @@ class TestCredentialPoolRecovery:
         assert context["message"] == "Weekly credits exhausted."
         assert context["reset_at"] == "2026-04-12T10:30:00Z"
 
+    def test_extract_api_error_context_uses_type_as_reason(self, agent):
+        error = SimpleNamespace(
+            body={
+                "error": {
+                    "type": "usage_limit_reached",
+                    "message": "The usage limit has been reached",
+                }
+            },
+            response=SimpleNamespace(headers={}),
+        )
+
+        context = agent._extract_api_error_context(error)
+
+        assert context["reason"] == "usage_limit_reached"
+        assert context["message"] == "The usage limit has been reached"
+
     def test_recover_with_pool_passes_error_context_on_rotated_429(self, agent):
         next_entry = SimpleNamespace(label="secondary")
         captured = {}

From 6f817e1447499cf51d8c966b3f3a600ba3412f85 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sat, 16 May 2026 16:23:35 -0600
Subject: [PATCH 146/218] fix(telegram): restore DM topic typing indicator

---
 gateway/platforms/telegram.py                  |  8 --------
 tests/gateway/test_telegram_thread_fallback.py | 17 +++++++++--------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 4c56937e5cb..50813c25dc6 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -3504,14 +3504,6 @@ class TelegramAdapter(BasePlatformAdapter):
         if self._bot:
             try:
                 _typing_thread = self._metadata_thread_id(metadata)
-                # Skip the Bot API call entirely for Hermes-created DM topic
-                # lanes: send_chat_action only accepts message_thread_id, which
-                # Telegram's Bot API 10.0 rejects for these lanes. The send
-                # path uses the reply-anchor fallback instead, but typing has
-                # no equivalent — skipping avoids noisy "thread not found"
-                # debug logs on every typing tick.
-                if metadata and metadata.get("telegram_dm_topic_reply_fallback"):
-                    return
                 message_thread_id = self._message_thread_id_for_typing(_typing_thread)
                 # No retry-without-thread fallback here: _message_thread_id_for_typing
                 # already maps the forum General topic to None, so any non-None value
diff --git a/tests/gateway/test_telegram_thread_fallback.py b/tests/gateway/test_telegram_thread_fallback.py
index e31753cc2b7..f310d017946 100644
--- a/tests/gateway/test_telegram_thread_fallback.py
+++ b/tests/gateway/test_telegram_thread_fallback.py
@@ -236,14 +236,13 @@ async def test_send_typing_does_not_fall_back_to_root_for_dm_topic():
 
 
 @pytest.mark.asyncio
-async def test_send_typing_skips_api_call_for_dm_topic_reply_fallback():
-    """Hermes-created DM topic lanes have no working Bot API typing route.
+async def test_send_typing_attempts_api_call_for_dm_topic_reply_fallback():
+    """Hermes-created DM topic lanes should still attempt scoped typing.
 
-    ``send_chat_action`` only accepts ``message_thread_id``, which Telegram's
-    Bot API 10.0 rejects for these lanes — the call would silently fail and
-    log a "thread not found" warning every typing tick (every 2s). Skipping
-    the call entirely keeps logs clean while preserving the user-visible
-    behavior (no typing indicator either way for these lanes).
+    Some private DM topic lanes route message sends through reply-anchor
+    fallback, but live Telegram testing shows sendChatAction accepts the lane's
+    message_thread_id. If Telegram rejects a stale or invalid thread later,
+    send_typing already swallows that failure as non-fatal.
     """
     adapter = _make_adapter()
     call_log = []
@@ -262,7 +261,9 @@ async def test_send_typing_skips_api_call_for_dm_topic_reply_fallback():
         },
     )
 
-    assert call_log == []
+    assert call_log == [
+        {"chat_id": 12345, "action": "typing", "message_thread_id": 20197},
+    ]
 
 
 @pytest.mark.asyncio

From 226cee43d97997525e4e26a20075aec98e641418 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 16:51:29 -0700
Subject: [PATCH 147/218] =?UTF-8?q?feat(cli):=20show=20=E2=96=B6=20N=20ind?=
 =?UTF-8?q?icator=20in=20status=20bar=20when=20/background=20tasks=20are?=
 =?UTF-8?q?=20running=20(#27175)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Surface live background-task count in the prompt_toolkit status bar so users
can see at a glance that a /background task exists and is running — no need
to ask the agent about it (the agent has no visibility into bg sessions by
design).

- _get_status_bar_snapshot now reports active_background_tasks from len()
  of the live _background_tasks dict (entries are removed in the task
  thread's finally block, so this reflects truly-running tasks)
- Indicator shown only on medium (<76) and wide (>=76) tiers; narrow (<52)
  stays minimal since it's already cramped
- No invalidate plumbing needed: status bar fragments are pulled via lambda
  on every redraw, and the bg thread already calls _app.invalidate() on exit

Refs #8568
---
 cli.py                                        |  25 +++++
 .../test_cli_background_status_indicator.py   | 104 ++++++++++++++++++
 2 files changed, 129 insertions(+)
 create mode 100644 tests/cli/test_cli_background_status_indicator.py

diff --git a/cli.py b/cli.py
index 12f9ee98fb8..bc97c8e84c4 100644
--- a/cli.py
+++ b/cli.py
@@ -3113,8 +3113,19 @@ class HermesCLI:
             "session_total_tokens": 0,
             "session_api_calls": 0,
             "compressions": 0,
+            "active_background_tasks": 0,
         }
 
+        # Count live /background tasks. The dict entry is removed in the
+        # task thread's finally block, so len() reflects truly-running tasks.
+        # len() on a CPython dict is atomic; safe to read without a lock.
+        try:
+            bg_tasks = getattr(self, "_background_tasks", None)
+            if bg_tasks:
+                snapshot["active_background_tasks"] = len(bg_tasks)
+        except Exception:
+            pass
+
         if not agent:
             return snapshot
 
@@ -3350,6 +3361,9 @@ class HermesCLI:
                 compressions = snapshot.get("compressions", 0)
                 if compressions:
                     parts.append(f"🗜️ {compressions}")
+                bg_count = snapshot.get("active_background_tasks", 0)
+                if bg_count:
+                    parts.append(f"▶ {bg_count}")
                 parts.append(duration_label)
                 if yolo_active:
                     parts.append("⚠ YOLO")
@@ -3366,6 +3380,9 @@ class HermesCLI:
             parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label]
             if compressions:
                 parts.append(f"🗜️ {compressions}")
+            bg_count = snapshot.get("active_background_tasks", 0)
+            if bg_count:
+                parts.append(f"▶ {bg_count}")
             parts.append(duration_label)
             prompt_elapsed = snapshot.get("prompt_elapsed")
             if prompt_elapsed:
@@ -3406,6 +3423,7 @@ class HermesCLI:
                 percent_label = f"{percent}%" if percent is not None else "--"
                 if width < 76:
                     compressions = snapshot.get("compressions", 0)
+                    bg_count = snapshot.get("active_background_tasks", 0)
                     frags = [
                         ("class:status-bar", " ⚕ "),
                         ("class:status-bar-strong", snapshot["model_short"]),
@@ -3415,6 +3433,9 @@ class HermesCLI:
                     if compressions:
                         frags.append(("class:status-bar-dim", " · "))
                         frags.append((self._compression_count_style(compressions), f"🗜️ {compressions}"))
+                    if bg_count:
+                        frags.append(("class:status-bar-dim", " · "))
+                        frags.append(("class:status-bar-strong", f"▶ {bg_count}"))
                     frags.extend([
                         ("class:status-bar-dim", " · "),
                         ("class:status-bar-dim", duration_label),
@@ -3433,6 +3454,7 @@ class HermesCLI:
 
                     bar_style = self._status_bar_context_style(percent)
                     compressions = snapshot.get("compressions", 0)
+                    bg_count = snapshot.get("active_background_tasks", 0)
                     frags = [
                         ("class:status-bar", " ⚕ "),
                         ("class:status-bar-strong", snapshot["model_short"]),
@@ -3446,6 +3468,9 @@ class HermesCLI:
                     if compressions:
                         frags.append(("class:status-bar-dim", " │ "))
                         frags.append((self._compression_count_style(compressions), f"🗜️ {compressions}"))
+                    if bg_count:
+                        frags.append(("class:status-bar-dim", " │ "))
+                        frags.append(("class:status-bar-strong", f"▶ {bg_count}"))
                     frags.extend([
                         ("class:status-bar-dim", " │ "),
                         ("class:status-bar-dim", duration_label),
diff --git a/tests/cli/test_cli_background_status_indicator.py b/tests/cli/test_cli_background_status_indicator.py
new file mode 100644
index 00000000000..32f39f96650
--- /dev/null
+++ b/tests/cli/test_cli_background_status_indicator.py
@@ -0,0 +1,104 @@
+"""Tests for the /background indicator in the CLI status bar.
+
+The classic prompt_toolkit status bar shows `▶ N` when N tasks launched via
+`/background` are still running. Source of truth is `self._background_tasks`
+(a Dict[str, threading.Thread]); entries are removed in the task thread's
+finally block, so len() reflects truly-running tasks.
+"""
+
+import threading
+from datetime import datetime
+
+from cli import HermesCLI
+
+
+def _stub_thread() -> threading.Thread:
+    """Return a Thread instance that's never started — pure dict-value stand-in."""
+    return threading.Thread(target=lambda: None)
+
+
+def _make_cli():
+    """Bare-metal HermesCLI for snapshot/build tests (no __init__ side effects)."""
+    cli_obj = HermesCLI.__new__(HermesCLI)
+    cli_obj.model = "anthropic/claude-opus-4.6"
+    cli_obj.agent = None
+    cli_obj._background_tasks = {}
+    # The snapshot reads session_start to compute duration; supply a stub.
+    cli_obj.session_start = datetime.now()
+    return cli_obj
+
+
+def test_snapshot_reports_zero_when_no_background_tasks():
+    cli_obj = _make_cli()
+    snap = cli_obj._get_status_bar_snapshot()
+    assert snap["active_background_tasks"] == 0
+
+
+def test_snapshot_counts_live_background_tasks():
+    cli_obj = _make_cli()
+    cli_obj._background_tasks = {"bg_a": _stub_thread(), "bg_b": _stub_thread()}
+    snap = cli_obj._get_status_bar_snapshot()
+    assert snap["active_background_tasks"] == 2
+
+
+def test_snapshot_safe_when_background_tasks_attr_missing():
+    """Older HermesCLI instances (tests with __new__, etc.) may lack the attr."""
+    cli_obj = HermesCLI.__new__(HermesCLI)
+    cli_obj.model = "x"
+    cli_obj.agent = None
+    cli_obj.session_start = datetime.now()
+    # No _background_tasks at all — must not raise.
+    snap = cli_obj._get_status_bar_snapshot()
+    assert snap["active_background_tasks"] == 0
+
+
+def test_plain_text_status_omits_indicator_when_idle():
+    cli_obj = _make_cli()
+    text = cli_obj._build_status_bar_text(width=80)
+    assert "▶" not in text
+
+
+def test_plain_text_status_shows_indicator_when_active():
+    cli_obj = _make_cli()
+    cli_obj._background_tasks = {"bg_a": _stub_thread()}
+    text = cli_obj._build_status_bar_text(width=80)
+    assert "▶ 1" in text
+
+
+def test_plain_text_status_shows_higher_count():
+    cli_obj = _make_cli()
+    cli_obj._background_tasks = {
+        "a": _stub_thread(),
+        "b": _stub_thread(),
+        "c": _stub_thread(),
+    }
+    text = cli_obj._build_status_bar_text(width=80)
+    assert "▶ 3" in text
+
+
+def test_narrow_width_omits_bg_indicator():
+    """The narrow tier (<52) is already cramped — bg is secondary, drop it."""
+    cli_obj = _make_cli()
+    cli_obj._background_tasks = {"bg_a": _stub_thread()}
+    text = cli_obj._build_status_bar_text(width=40)
+    assert "▶" not in text
+
+
+def test_fragments_include_bg_segment_when_active():
+    cli_obj = _make_cli()
+    cli_obj._background_tasks = {"a": _stub_thread(), "b": _stub_thread()}
+    cli_obj._status_bar_visible = True
+    # _get_status_bar_fragments asks _get_tui_terminal_width(); stub it wide.
+    cli_obj._get_tui_terminal_width = lambda: 120  # type: ignore[method-assign]
+    frags = cli_obj._get_status_bar_fragments()
+    rendered = "".join(text for _style, text in frags)
+    assert "▶ 2" in rendered
+
+
+def test_fragments_omit_bg_segment_when_idle():
+    cli_obj = _make_cli()
+    cli_obj._status_bar_visible = True
+    cli_obj._get_tui_terminal_width = lambda: 120  # type: ignore[method-assign]
+    frags = cli_obj._get_status_bar_fragments()
+    rendered = "".join(text for _style, text in frags)
+    assert "▶" not in rendered

From e21cb8d1457f603cda1dc8413efc400721d256e7 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 16:51:42 -0700
Subject: [PATCH 148/218] feat(status): append session recap to /status output
 (#27176)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a pure-local recap of recent session activity — turn counts,
tools used, files touched, last user ask, last assistant reply —
appended to the existing /status output. Useful when juggling multiple
sessions and you want a one-glance reminder of where this one left off.

Inspired by Claude Code 2.1.114's /recap, but folded into /status so
we don't add a 6th info command. Pure local computation: no LLM call,
no auxiliary model, no prompt-cache invalidation, instant and free.

Salvage of #18587 — kept the shared hermes_cli.session_recap.build_recap
helper and its 13 unit tests, dropped the /recap slash command +
ACTIVE_SESSION_BYPASS_COMMANDS entry + Level-2 bypass since /status
already covers both surfaces.

Tailored to hermes-agent's tool vocabulary: file-editing tools
(patch, write_file, read_file, skill_manage, skill_view) surface
touched paths; tool-call counts highlight which classes of work
drove the session.

Source: https://code.claude.com/docs/en/whats-new/2026-w17
---
 cli.py                                 |  18 ++
 gateway/run.py                         |  18 ++
 hermes_cli/session_recap.py            | 316 +++++++++++++++++++++++++
 tests/hermes_cli/test_session_recap.py | 180 ++++++++++++++
 4 files changed, 532 insertions(+)
 create mode 100644 hermes_cli/session_recap.py
 create mode 100644 tests/hermes_cli/test_session_recap.py

diff --git a/cli.py b/cli.py
index bc97c8e84c4..c1ba1c0ddd2 100644
--- a/cli.py
+++ b/cli.py
@@ -5469,6 +5469,24 @@ class HermesCLI:
             f"Tokens: {total_tokens:,}",
             f"Agent Running: {'Yes' if is_running else 'No'}",
         ])
+
+        # Session recap — pure local compute summary of recent activity
+        # (turn counts, tools used, files touched, last ask, last reply).
+        # No LLM call, no prompt-cache impact. Inspired by Claude Code
+        # 2.1.114's /recap.
+        try:
+            from hermes_cli.session_recap import build_recap
+            recap = build_recap(
+                self.conversation_history or [],
+                session_title=title or None,
+                session_id=self.session_id,
+                platform="cli",
+            )
+            if recap:
+                lines.extend(["", recap])
+        except Exception as exc:  # defensive — don't let /status fail
+            logger.debug("build_recap failed in /status: %s", exc)
+
         self._console_print("\n".join(lines), highlight=False, markup=False)
     
     def _fast_command_available(self) -> bool:
diff --git a/gateway/run.py b/gateway/run.py
index a5eaafcb063..458603c3115 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -8663,6 +8663,24 @@ class GatewayRunner:
             t("gateway.status.platforms", platforms=', '.join(connected_platforms)),
         ])
 
+        # Session recap — what was this session ABOUT? Pure local compute,
+        # no LLM call, no prompt-cache impact. Useful when juggling multiple
+        # gateway sessions and you want a one-glance reminder of where this
+        # one left off. Inspired by Claude Code 2.1.114's /recap.
+        try:
+            from hermes_cli.session_recap import build_recap
+            history = self.session_store.load_transcript(session_entry.session_id)
+            recap = build_recap(
+                history,
+                session_title=title,
+                session_id=session_entry.session_id,
+                platform=source.platform.value if source else None,
+            )
+            if recap:
+                lines.extend(["", recap])
+        except Exception as exc:  # pragma: no cover — defensive
+            logger.debug("build_recap failed in /status: %s", exc)
+
         return "\n".join(lines)
 
     async def _handle_agents_command(self, event: MessageEvent) -> str:
diff --git a/hermes_cli/session_recap.py b/hermes_cli/session_recap.py
new file mode 100644
index 00000000000..d67f737d799
--- /dev/null
+++ b/hermes_cli/session_recap.py
@@ -0,0 +1,316 @@
+"""Session recap — summarize what's happened in the current session.
+
+Inspired by Claude Code's `/recap` command (v2.1.114, April 2026), which
+shows a one-line summary of what happened while a terminal was unfocused
+so users juggling multiple sessions can re-orient quickly.
+
+Source: https://code.claude.com/docs/en/whats-new/2026-w17
+
+Differences from Claude Code:
+    - Pure local computation from the in-memory conversation history. No
+      LLM call, no auxiliary model, no prompt-cache invalidation. A
+      recap should be instant and free.
+    - Works unchanged on CLI and every gateway platform (Telegram,
+      Discord, Slack, …) because both call into the same ``build_recap``
+      helper. Claude Code only shows this on the CLI.
+    - Tailored to hermes-agent's tool vocabulary (``terminal``, ``patch``,
+      ``write_file``, ``delegate_task``, ``browser_*``, ``web_*``) — the
+      recap surfaces which classes of work were most active.
+"""
+from __future__ import annotations
+
+import os
+from collections import Counter
+from typing import Any, Iterable, List, Mapping, Optional, Sequence, Tuple
+
+# How many recent user/assistant turns we consider "recent activity".
+_RECENT_TURN_WINDOW = 20
+
+# How many characters of the latest user prompt to show.
+_PROMPT_PREVIEW_CHARS = 140
+
+# How many characters of the latest assistant text to show.
+_ASSISTANT_PREVIEW_CHARS = 200
+
+# How many recently-touched files to list.
+_MAX_FILES_LISTED = 5
+
+# Tool names that identify a file-editing action and the argument key that
+# holds the path.
+_FILE_EDIT_TOOLS: Mapping[str, str] = {
+    "write_file": "path",
+    "patch": "path",
+    "read_file": "path",
+    "skill_manage": "file_path",
+    "skill_view": "file_path",
+}
+
+
+def _coerce_text(value: Any) -> str:
+    """Flatten assistant/user ``content`` into a plain string.
+
+    Content can be a string or a list of content blocks (for multimodal
+    or reasoning models). We concatenate every text-like block and
+    ignore the rest.
+    """
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value
+    if isinstance(value, list):
+        parts: List[str] = []
+        for block in value:
+            if isinstance(block, str):
+                parts.append(block)
+                continue
+            if isinstance(block, Mapping):
+                text = block.get("text")
+                if isinstance(text, str) and text:
+                    parts.append(text)
+        return "\n".join(parts)
+    return str(value)
+
+
+def _tool_call_name_and_args(tool_call: Any) -> Tuple[str, Mapping[str, Any]]:
+    """Extract ``(name, arguments_dict)`` from a tool_call entry.
+
+    ``arguments`` may be a JSON string or a dict depending on provider.
+    Return an empty dict if it cannot be parsed.
+    """
+    if not isinstance(tool_call, Mapping):
+        return "", {}
+    fn = tool_call.get("function") or {}
+    if not isinstance(fn, Mapping):
+        return "", {}
+    name = str(fn.get("name") or "") or ""
+    raw_args = fn.get("arguments")
+    if isinstance(raw_args, Mapping):
+        return name, raw_args
+    if isinstance(raw_args, str) and raw_args:
+        try:
+            import json
+
+            parsed = json.loads(raw_args)
+            if isinstance(parsed, Mapping):
+                return name, parsed
+        except Exception:
+            return name, {}
+    return name, {}
+
+
+def _iter_assistant_tool_calls(
+    messages: Sequence[Mapping[str, Any]],
+) -> Iterable[Tuple[str, Mapping[str, Any]]]:
+    for msg in messages:
+        if not isinstance(msg, Mapping):
+            continue
+        if msg.get("role") != "assistant":
+            continue
+        tool_calls = msg.get("tool_calls") or []
+        if not isinstance(tool_calls, list):
+            continue
+        for tc in tool_calls:
+            name, args = _tool_call_name_and_args(tc)
+            if name:
+                yield name, args
+
+
+def _count_visible_turns(
+    messages: Sequence[Mapping[str, Any]],
+) -> Tuple[int, int, int]:
+    """Return ``(user_turn_count, assistant_turn_count, tool_message_count)``."""
+    users = assistants = tools = 0
+    for msg in messages:
+        if not isinstance(msg, Mapping):
+            continue
+        role = msg.get("role")
+        if role == "user":
+            users += 1
+        elif role == "assistant":
+            assistants += 1
+        elif role == "tool":
+            tools += 1
+    return users, assistants, tools
+
+
+def _latest_user_prompt(
+    messages: Sequence[Mapping[str, Any]],
+) -> Optional[str]:
+    for msg in reversed(messages):
+        if isinstance(msg, Mapping) and msg.get("role") == "user":
+            text = _coerce_text(msg.get("content")).strip()
+            if text:
+                return text
+    return None
+
+
+def _latest_assistant_text(
+    messages: Sequence[Mapping[str, Any]],
+) -> Optional[str]:
+    for msg in reversed(messages):
+        if not isinstance(msg, Mapping):
+            continue
+        if msg.get("role") != "assistant":
+            continue
+        text = _coerce_text(msg.get("content")).strip()
+        if text:
+            return text
+    return None
+
+
+def _recent_window(
+    messages: Sequence[Mapping[str, Any]], window: int = _RECENT_TURN_WINDOW
+) -> List[Mapping[str, Any]]:
+    """Return the tail slice of ``messages`` covering at most ``window``
+    user+assistant turns (tool messages ride along inside the window).
+
+    Iterating from the end, we count user and assistant messages and
+    keep everything from the first message that falls within the window.
+    """
+    count = 0
+    cut = 0
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        if isinstance(msg, Mapping) and msg.get("role") in ("user", "assistant"):
+            count += 1
+            if count >= window:
+                cut = i
+                break
+    else:
+        return list(messages)
+    return list(messages[cut:])
+
+
+def _shortened_path(path: str) -> str:
+    """Show a path relative to cwd when possible, otherwise with ~ expansion."""
+    if not path:
+        return path
+    try:
+        abs_path = os.path.abspath(os.path.expanduser(path))
+        cwd = os.getcwd()
+        if abs_path == cwd:
+            return "."
+        if abs_path.startswith(cwd + os.sep):
+            return abs_path[len(cwd) + 1 :]
+        home = os.path.expanduser("~")
+        if abs_path.startswith(home + os.sep):
+            return "~/" + abs_path[len(home) + 1 :]
+        return abs_path
+    except Exception:
+        return path
+
+
+def _summarise_tool_activity(
+    tool_calls: Sequence[Tuple[str, Mapping[str, Any]]],
+) -> Tuple[List[Tuple[str, int]], List[str]]:
+    """Return ``(tool_counts_sorted, recently_edited_files)``.
+
+    ``tool_counts_sorted`` is descending by count, keeping the full list
+    so callers can truncate for display. ``recently_edited_files`` lists
+    distinct paths (most recent first) from file-editing tools.
+    """
+    counter: Counter[str] = Counter()
+    files_seen: List[str] = []
+    files_set: set[str] = set()
+    # Walk in reverse so "most recent first" drops out of order-preserved iteration.
+    for name, args in reversed(list(tool_calls)):
+        counter[name] += 1
+        arg_key = _FILE_EDIT_TOOLS.get(name)
+        if arg_key:
+            path = args.get(arg_key)
+            if isinstance(path, str) and path and path not in files_set:
+                files_set.add(path)
+                files_seen.append(_shortened_path(path))
+    # Restore "reverse of reverse" for correct counts; Counter ignores order
+    # so only files_seen needed the reversal. Fix ordering: currently
+    # files_seen is newest→oldest which is what we want for display.
+    tool_counts = sorted(counter.items(), key=lambda kv: (-kv[1], kv[0]))
+    return tool_counts, files_seen
+
+
+def _truncate(text: str, limit: int) -> str:
+    text = " ".join(text.split())  # collapse newlines for a compact one-liner
+    if len(text) <= limit:
+        return text
+    return text[: limit - 1].rstrip() + "…"
+
+
+def build_recap(
+    messages: Sequence[Mapping[str, Any]],
+    *,
+    session_title: Optional[str] = None,
+    session_id: Optional[str] = None,
+    platform: Optional[str] = None,
+) -> str:
+    """Build a multi-line recap of recent activity.
+
+    Inputs:
+        messages: the full conversation history as a list of
+            chat-completion-style dicts (``role``, ``content``,
+            ``tool_calls``, …).
+        session_title: optional human title (from SessionDB).
+        session_id: optional session id.
+        platform: optional hint (``"cli"``, ``"telegram"``, …). Does not
+            change behavior today but is accepted for forward compat.
+
+    The output is plain text designed to render well in both a terminal
+    (with 80-col wrapping) and a gateway message bubble.
+    """
+    _ = platform  # reserved for future use
+    lines: List[str] = []
+
+    header_bits: List[str] = ["Session recap"]
+    if session_title:
+        header_bits.append(f"— {session_title}")
+    elif session_id:
+        header_bits.append(f"— {session_id[:8]}")
+    lines.append(" ".join(header_bits))
+
+    if not messages:
+        lines.append("  (nothing to recap — no messages yet)")
+        return "\n".join(lines)
+
+    users, assistants, tool_msgs = _count_visible_turns(messages)
+    window = _recent_window(messages)
+    win_users, win_assistants, _ = _count_visible_turns(window)
+
+    scope = (
+        f"{win_users} user turn{'s' if win_users != 1 else ''} / "
+        f"{win_assistants} assistant repl{'ies' if win_assistants != 1 else 'y'}"
+    )
+    if (users, assistants) != (win_users, win_assistants):
+        scope += f" (of {users}/{assistants} total)"
+    lines.append(f"  Recent: {scope}, {tool_msgs} tool result{'s' if tool_msgs != 1 else ''}")
+
+    tool_calls = list(_iter_assistant_tool_calls(window))
+    tool_counts, files = _summarise_tool_activity(tool_calls)
+    if tool_counts:
+        top = ", ".join(f"{name}×{count}" for name, count in tool_counts[:5])
+        extra = len(tool_counts) - 5
+        if extra > 0:
+            top += f" (+{extra} more)"
+        lines.append(f"  Tools used: {top}")
+    if files:
+        shown = files[:_MAX_FILES_LISTED]
+        extra = len(files) - len(shown)
+        entry = ", ".join(shown)
+        if extra > 0:
+            entry += f" (+{extra} more)"
+        lines.append(f"  Files touched: {entry}")
+
+    latest_user = _latest_user_prompt(window)
+    if latest_user:
+        lines.append(f"  Last ask: {_truncate(latest_user, _PROMPT_PREVIEW_CHARS)}")
+
+    latest_reply = _latest_assistant_text(window)
+    if latest_reply:
+        lines.append(f"  Last reply: {_truncate(latest_reply, _ASSISTANT_PREVIEW_CHARS)}")
+
+    if len(lines) == 2:
+        # Only the header + scope line — nothing substantive to show.
+        lines.append("  (no assistant activity yet in this window)")
+
+    return "\n".join(lines)
+
+
+__all__ = ["build_recap"]
diff --git a/tests/hermes_cli/test_session_recap.py b/tests/hermes_cli/test_session_recap.py
new file mode 100644
index 00000000000..3998c06c61a
--- /dev/null
+++ b/tests/hermes_cli/test_session_recap.py
@@ -0,0 +1,180 @@
+"""Unit tests for hermes_cli.session_recap."""
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from hermes_cli.session_recap import build_recap
+
+
+def _user(text):
+    return {"role": "user", "content": text}
+
+
+def _assistant(text=None, tool_calls=None):
+    msg = {"role": "assistant", "content": text}
+    if tool_calls:
+        msg["tool_calls"] = tool_calls
+    return msg
+
+
+def _tool_call(name, args):
+    return {
+        "id": f"call_{name}",
+        "type": "function",
+        "function": {"name": name, "arguments": json.dumps(args)},
+    }
+
+
+def _tool_result(content="ok"):
+    return {"role": "tool", "content": content}
+
+
+def test_empty_history():
+    out = build_recap([])
+    assert "Session recap" in out
+    assert "nothing to recap" in out
+
+
+def test_header_shows_title_when_provided():
+    out = build_recap([_user("hello")], session_title="Refactor the adapter")
+    assert "Refactor the adapter" in out.splitlines()[0]
+
+
+def test_header_shows_short_id_when_no_title():
+    out = build_recap([_user("hello")], session_id="abcdef1234567890")
+    assert "abcdef12" in out.splitlines()[0]
+
+
+def test_counts_recent_turns():
+    msgs = [
+        _user("one"),
+        _assistant("first reply"),
+        _user("two"),
+        _assistant("second reply"),
+    ]
+    out = build_recap(msgs)
+    assert "2 user turn" in out
+    assert "assistant repl" in out
+
+
+def test_last_ask_and_reply_are_surfaced():
+    msgs = [
+        _user("old question"),
+        _assistant("old answer"),
+        _user("summarise the docs"),
+        _assistant("here is the summary of the docs you asked for"),
+    ]
+    out = build_recap(msgs)
+    assert "summarise the docs" in out
+    assert "summary of the docs" in out
+
+
+def test_tool_counts_and_files():
+    msgs = [
+        _user("edit the readme and run tests"),
+        _assistant(
+            tool_calls=[
+                _tool_call("read_file", {"path": "README.md"}),
+                _tool_call("patch", {"path": "README.md"}),
+            ]
+        ),
+        _tool_result(),
+        _tool_result(),
+        _assistant(
+            tool_calls=[
+                _tool_call("terminal", {"command": "pytest"}),
+            ]
+        ),
+        _tool_result("tests ok"),
+        _assistant("All green."),
+    ]
+    out = build_recap(msgs)
+    assert "patch×1" in out
+    assert "terminal×1" in out
+    assert "read_file×1" in out
+    # README.md should appear (may include cwd-relative prefix stripping).
+    assert "README.md" in out
+
+
+def test_tool_preview_length_truncates_long_user_prompt():
+    long = "x " * 500
+    out = build_recap([_user(long)])
+    ask_line = [l for l in out.splitlines() if "Last ask" in l][0]
+    assert len(ask_line) < 300  # truncated with ellipsis
+    assert "…" in ask_line
+
+
+def test_respects_recent_window():
+    # 30 turns of user+assistant; only the most recent 20 should be summarised.
+    msgs = []
+    for i in range(30):
+        msgs.append(_user(f"question {i}"))
+        msgs.append(_assistant(f"answer {i}"))
+    out = build_recap(msgs)
+    # We scoped to the 20-turn window but show "of 30/30 total".
+    assert "of 30/30 total" in out
+
+
+def test_multimodal_content_blocks_flattened():
+    msgs = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "check this file"},
+                {"type": "image_url", "image_url": {"url": "..."}},
+            ],
+        },
+        _assistant("Looked at your image."),
+    ]
+    out = build_recap(msgs)
+    assert "check this file" in out
+    assert "Looked at your image" in out
+
+
+def test_handles_arguments_as_dict_not_string():
+    # Some providers return arguments already as a dict.
+    msgs = [
+        _user("go"),
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "patch",
+                        "arguments": {"path": "foo.py"},
+                    },
+                }
+            ],
+        },
+    ]
+    out = build_recap(msgs)
+    assert "patch×1" in out
+    assert "foo.py" in out
+
+
+def test_no_assistant_activity_hint():
+    out = build_recap([_user("just sent my first message")])
+    assert "no assistant activity" in out or "Last ask" in out
+
+
+def test_tool_message_count_reported():
+    msgs = [
+        _user("go"),
+        _assistant(tool_calls=[_tool_call("read_file", {"path": "a"})]),
+        _tool_result(),
+        _tool_result(),
+        _assistant("done"),
+    ]
+    out = build_recap(msgs)
+    assert "2 tool result" in out
+
+
+def test_ignores_non_mapping_entries_gracefully():
+    msgs = [None, "stray", _user("hi"), _assistant("hello")]
+    # Should not raise.
+    out = build_recap(msgs)
+    assert "Session recap" in out

From 2b193907d668af0c45f108d885db53a7ce8b8919 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 17:09:41 -0700
Subject: [PATCH 149/218] fix(xai): surface provider 'error' SSE frame in Codex
 fallback stream (#27184)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xAI's Responses stream emits 'type=error' as the FIRST SSE frame when an
OAuth account is unsubscribed/exhausted or rejects the encrypted-reasoning
replay introduced in the May 2026 SuperGrok rollout. The SDK helper
raises RuntimeError(Expected to have received response.created before
error), which the caller correctly routes to
_run_codex_create_stream_fallback. The fallback then opens a new stream
that emits the same 'error' frame — but the fallback loop only handled
{response.completed, response.incomplete, response.failed} and silently
continue'd past 'error' events. Result: the loop fell off the end of
the stream and raised the useless 'fallback did not emit a terminal
response' RuntimeError, which the classifier marked retryable=True and
looped 3x before failing with no clue what went wrong.

Now: 'error' frames raise a synthesized _StreamErrorEvent with an OpenAI
SDK-shaped .body so _summarize_api_error, _extract_api_error_context,
_is_entitlement_failure, and classify_api_error all see the real
provider message. Users on unsubscribed accounts now see 'do not have
an active Grok subscription' once, not three RuntimeErrors.

Verified end-to-end: classifier returns reason=auth retryable=False;
entitlement detector matches even with status_code=None; summarizer
returns the full xAI message.

Tests: 4 new in TestCodexFallbackErrorEvent covering xAI subscription
message, dict-shaped events, summarizer integration, and the empty-stream
case (must still raise the original RuntimeError so 'truncated mid-flight'
stays distinguishable from 'provider rejected the call').
---
 run_agent.py                      |  67 ++++++++++++++
 tests/run_agent/test_streaming.py | 142 ++++++++++++++++++++++++++++++
 2 files changed, 209 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index ffe0ffbe67e..3f4573bac52 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1110,6 +1110,45 @@ def _qwen_portal_headers() -> dict:
     }
 
 
+class _StreamErrorEvent(Exception):
+    """Synthesized provider error surfaced from a Responses ``error`` SSE frame.
+
+    Some Codex-style Responses backends (xAI for subscription/quota
+    failures, custom relays under malformed-tool-call conditions) emit a
+    standalone ``type=error`` frame instead of routing the failure
+    through ``response.failed`` or returning an HTTP 4xx.  The fallback
+    streaming path raises this exception so ``_summarize_api_error`` and
+    ``_extract_api_error_context`` see a familiar ``.body`` /
+    ``.status_code`` shape and the entitlement detector can match the
+    underlying provider message ("do not have an active Grok
+    subscription", etc.).
+    """
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        code: Optional[str] = None,
+        param: Optional[str] = None,
+        status_code: Optional[int] = None,
+    ) -> None:
+        super().__init__(message)
+        self.message = message
+        self.code = code
+        self.param = param
+        self.status_code = status_code
+        # OpenAI SDK-shaped body so _extract_api_error_context /
+        # _summarize_api_error / classify_api_error all pick it up.
+        self.body: Dict[str, Any] = {
+            "error": {
+                "message": message,
+                "code": code,
+                "param": param,
+                "type": "error",
+            }
+        }
+
+
 class AIAgent:
     """
     AI Agent with tool calling capabilities.
@@ -7212,6 +7251,34 @@ class AIAgent:
                 if not event_type and isinstance(event, dict):
                     event_type = event.get("type")
 
+                # ``error`` SSE frames carry the provider's real failure
+                # reason (subscription / quota / model-not-available /
+                # rejected-reasoning-replay) but never appear in the
+                # ``{completed, incomplete, failed}`` terminal set, so the
+                # raw loop below would silently consume them and end with
+                # "did not emit a terminal response".  xAI in particular
+                # emits ``type=error`` as the FIRST frame for OAuth
+                # accounts whose Grok subscription is missing/exhausted —
+                # the SDK's stream helper raises ``RuntimeError(Expected
+                # to have received response.created before error)`` which
+                # the caller catches and routes here, expecting this
+                # fallback to surface the message.  Synthesize an
+                # APIError-shaped exception so ``_summarize_api_error``
+                # and the credential-pool entitlement detector see the
+                # real text instead of a generic RuntimeError.
+                if event_type == "error":
+                    err_message = getattr(event, "message", None)
+                    if not err_message and isinstance(event, dict):
+                        err_message = event.get("message")
+                    err_code = getattr(event, "code", None)
+                    if not err_code and isinstance(event, dict):
+                        err_code = event.get("code")
+                    err_param = getattr(event, "param", None)
+                    if not err_param and isinstance(event, dict):
+                        err_param = event.get("param")
+                    err_message = (err_message or "stream emitted error event").strip()
+                    raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
+
                 # Collect output items and text deltas for backfill
                 if event_type == "response.output_item.done":
                     done_item = getattr(event, "item", None)
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index 1ce140f82bf..474a568875d 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -1586,3 +1586,145 @@ class TestCopilotACPStreamingDecision:
             _use_streaming = False
 
         assert _use_streaming is True
+
+
+class TestCodexFallbackErrorEvent:
+    """Provider ``error`` SSE frames must surface the real message,
+    not the generic "did not emit a terminal response" RuntimeError.
+
+    xAI emits ``type=error`` as the FIRST frame on the Responses stream
+    when an OAuth account is unsubscribed/exhausted (May 2026
+    SuperGrok rollout).  The SDK helper raises
+    ``RuntimeError("Expected to have received response.created before
+    error")`` which the caller catches and routes to
+    ``_run_codex_create_stream_fallback``.  The fallback then opens a
+    NEW stream that emits the same ``type=error`` frame; before this
+    fix it ignored the event entirely and raised a useless RuntimeError.
+    """
+
+    def _make_agent(self):
+        from run_agent import AIAgent
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://api.x.ai/v1",
+            provider="xai-oauth",
+            model="grok-4.3",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "codex_responses"
+        agent._touch_activity = lambda desc: None
+        return agent
+
+    def test_fallback_raises_synthesized_error_with_xai_subscription_message(self):
+        from run_agent import _StreamErrorEvent
+
+        agent = self._make_agent()
+
+        error_event = SimpleNamespace(
+            type="error",
+            message=(
+                "Forbidden: The caller does not have permission to execute the specified operation. "
+                "'You have either run out of available resources or do not have an active Grok subscription.'"
+            ),
+            code="permission_denied",
+            param=None,
+            sequence_number=1,
+        )
+
+        class _FakeStream:
+            def __iter__(self_inner):
+                return iter([error_event])
+            def close(self_inner):
+                return None
+
+        mock_client = MagicMock()
+        mock_client.responses.create.return_value = _FakeStream()
+
+        with pytest.raises(_StreamErrorEvent) as excinfo:
+            agent._run_codex_create_stream_fallback(
+                {"model": "grok-4.3", "instructions": "hi", "input": []},
+                client=mock_client,
+            )
+
+        exc = excinfo.value
+        assert "active Grok subscription" in str(exc)
+        assert exc.code == "permission_denied"
+        assert isinstance(exc.body, dict)
+        assert exc.body["error"]["message"] == error_event.message
+        # _extract_api_error_context reads .body["error"]["message"] — make sure
+        # the entitlement detector will find the subscription phrase there.
+        assert "active Grok subscription" in exc.body["error"]["message"]
+
+    def test_fallback_dict_event_payload_is_also_handled(self):
+        """Some relays deliver events as plain dicts instead of model
+        objects; the dict branch in the loop must surface them too."""
+        from run_agent import _StreamErrorEvent
+
+        agent = self._make_agent()
+
+        error_event = {
+            "type": "error",
+            "message": "rate_limited",
+            "code": "rate_limit_exceeded",
+        }
+
+        class _FakeStream:
+            def __iter__(self_inner):
+                return iter([error_event])
+            def close(self_inner):
+                return None
+
+        mock_client = MagicMock()
+        mock_client.responses.create.return_value = _FakeStream()
+
+        with pytest.raises(_StreamErrorEvent) as excinfo:
+            agent._run_codex_create_stream_fallback(
+                {"model": "grok-4.3", "instructions": "hi", "input": []},
+                client=mock_client,
+            )
+
+        assert "rate_limited" in str(excinfo.value)
+        assert excinfo.value.code == "rate_limit_exceeded"
+
+    def test_fallback_surfaces_message_useful_to_summarizer(self):
+        """The synthesized exception must be readable by
+        ``_summarize_api_error`` so the user-facing log line shows the
+        real provider message instead of a generic class name."""
+        from run_agent import AIAgent, _StreamErrorEvent
+
+        agent = self._make_agent()
+        exc = _StreamErrorEvent(
+            "You have either run out of available resources or do not have an active Grok subscription.",
+            code="permission_denied",
+        )
+
+        summary = AIAgent._summarize_api_error(exc)
+        assert "active Grok subscription" in summary
+
+    def test_fallback_still_raises_terminal_error_when_no_error_event(self):
+        """Streams that simply end without any terminal event (and no
+        ``error`` frame) must continue to raise the original
+        ``"did not emit a terminal response"`` RuntimeError so callers
+        can distinguish "stream truncated mid-flight" from "provider
+        rejected the call"."""
+        agent = self._make_agent()
+
+        # Empty stream — no events at all
+        class _FakeStream:
+            def __iter__(self_inner):
+                return iter([])
+            def close(self_inner):
+                return None
+
+        mock_client = MagicMock()
+        mock_client.responses.create.return_value = _FakeStream()
+
+        with pytest.raises(RuntimeError) as excinfo:
+            agent._run_codex_create_stream_fallback(
+                {"model": "grok-4.3", "instructions": "hi", "input": []},
+                client=mock_client,
+            )
+
+        assert "did not emit a terminal response" in str(excinfo.value)

From 33528b428d196443f788f43fec3139bd6e2c4997 Mon Sep 17 00:00:00 2001
From: konsisumer <der@konsi.org>
Date: Wed, 6 May 2026 17:21:47 +0200
Subject: [PATCH 150/218] fix(agent): reset _fallback_index at turn start even
 when no fallback activated

In long-lived interactive sessions, _try_activate_fallback() advances
_fallback_index before attempting client resolution.  When resolution
fails (provider not configured, etc.) the function returns False without
ever setting _fallback_activated=True.  _restore_primary_runtime() then
skips its reset block entirely (guarded by `if not _fallback_activated`),
leaving _fallback_index >= len(_fallback_chain) for all subsequent turns.
The eager-fallback guard at the top of the retry loop checks
`_fallback_index < len(_fallback_chain)`, so the condition fails silently
and no fallback is ever attempted again for that session.

Cron jobs spawn a fresh AIAgent per run and never hit this path, which is
why the same fallback chain works reliably for cron but not interactive.

Fix: reset _fallback_index=0 in the `not _fallback_activated` early-return
branch so every new turn starts with the full chain available.

Fixes #20465
---
 run_agent.py                                  |  8 ++++++++
 .../run_agent/test_primary_runtime_restore.py | 20 +++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 3f4573bac52..2931c4fa349 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9223,6 +9223,14 @@ class AIAgent:
         ``gateway/run.py``), so this restoration IS needed there too.
         """
         if not self._fallback_activated:
+            # Reset the chain index even when no fallback was activated this
+            # turn.  Without this, a turn where _try_activate_fallback() was
+            # called but returned False (chain exhausted or provider not
+            # configured) leaves _fallback_index >= len(_fallback_chain) while
+            # _fallback_activated stays False.  The next turn skips this block
+            # entirely, stranding the index and silently blocking all future
+            # fallback attempts for the session.  Fixes #20465.
+            self._fallback_index = 0
             return False
 
         if getattr(self, "_rate_limited_until", 0) > time.monotonic():
diff --git a/tests/run_agent/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py
index d082f047f27..b921e61ab14 100644
--- a/tests/run_agent/test_primary_runtime_restore.py
+++ b/tests/run_agent/test_primary_runtime_restore.py
@@ -123,6 +123,26 @@ class TestRestorePrimaryRuntime:
         assert agent._fallback_activated is False
         assert agent._restore_primary_runtime() is False
 
+    def test_resets_index_when_fallback_not_activated(self):
+        """Regression for #20465: failed activation leaves _fallback_index advanced
+        with _fallback_activated=False; the next turn's restore must reset the index."""
+        fbs = [{"provider": "custom", "model": "gpt-oss:20b",
+                "base_url": "http://host.docker.internal:11434/v1", "api_key": "ollama"}]
+        agent = _make_agent(fallback_model=fbs)
+
+        # resolve_provider_client returns None → _try_activate_fallback returns False
+        # but _fallback_index has already been incremented to 1
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(None, None)):
+            assert agent._try_activate_fallback() is False
+
+        assert agent._fallback_activated is False
+        assert agent._fallback_index == 1  # advanced past the only entry
+
+        # _restore_primary_runtime must reset the index so the next turn can retry
+        result = agent._restore_primary_runtime()
+        assert result is False  # still no-op (primary was never left)
+        assert agent._fallback_index == 0  # chain available again
+
     def test_restores_model_and_provider(self):
         agent = _make_agent(
             fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},

From 29b1bd0e20e5848e2be8de431a225174ab6a7fed Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 17:14:45 -0700
Subject: [PATCH 151/218] feat(cli): add `hermes send` to pipe script output to
 any messaging platform (#27188)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces a thin CLI wrapper around the existing send_message_tool so
shell scripts, cron scripts, CI hooks, and monitoring daemons can reuse
the gateway's already-configured platform credentials without
reimplementing each platform's REST client.

  hermes send --to telegram "deploy finished"
  echo "RAM 92%" | hermes send --to telegram:-1001234567890
  hermes send --to discord:#ops --file report.md
  hermes send --to slack:#eng --subject "[CI]" --file build.log
  hermes send --list                  # all targets
  hermes send --list telegram         # filter by platform

Supports all platforms the send_message tool already does (Telegram,
Discord, Slack, Signal, SMS, WhatsApp, Matrix, Feishu, DingTalk, WeCom,
Weixin, Email, etc.), including threaded targets and #channel-name
resolution via the channel directory.

hermes_cli/send_cmd.py delegates to tools.send_message_tool.send_message_tool,
which means there is zero new platform-specific code. The subcommand just:

1. Bridges ~/.hermes/.env and top-level ~/.hermes/config.yaml scalars into
   os.environ (same bootstrap the gateway does at startup) — required so
   TELEGRAM_HOME_CHANNEL and friends are visible to load_gateway_config().
2. Resolves the message body from positional arg, --file, or piped stdin.
3. Calls the shared tool and translates its JSON result to exit codes:
   0 success, 1 delivery failure, 2 usage error.

No running gateway is required for bot-token platforms (Telegram, Discord,
Slack, Signal, SMS, WhatsApp) — the tool hits each platform's REST API
directly. Plugin platforms that rely on a live adapter connection still
need the gateway running; the error message is forwarded verbatim.

- New guide: website/docs/guides/pipe-script-output.md covering real-world
  patterns (memory watchdogs, CI hooks, cron pipes, long-running task
  completion pings) and the security/gateway notes.
- Cross-links added from automate-with-cron.md ("no LLM? use hermes send")
  and developer-guide/gateway-internals.md (delivery-path section).

tests/hermes_cli/test_send_cmd.py (20 tests, all green):

- Happy paths: positional message, stdin, --file, --file -, --subject,
  --json, --quiet.
- Error paths: missing --to, missing body, file not found, tool returns
  error payload (exit 1), tool skipped-send result (exit 0).
- --list: human output, --json output, platform filter, unknown platform.
- Env loader: bridges config.yaml scalars into env, does not override
  existing env vars, gracefully handles missing files.
- Registrar contract: register_send_subparser() returns a working parser.

Smoke-tested end-to-end against a live Telegram bot before commit.
---
 hermes_cli/main.py                            |   6 +
 hermes_cli/send_cmd.py                        | 445 ++++++++++++++++++
 tests/hermes_cli/test_send_cmd.py             | 387 +++++++++++++++
 .../docs/developer-guide/gateway-internals.md |   2 +-
 website/docs/guides/automate-with-cron.md     |   5 +-
 website/docs/guides/pipe-script-output.md     | 249 ++++++++++
 6 files changed, 1091 insertions(+), 3 deletions(-)
 create mode 100644 hermes_cli/send_cmd.py
 create mode 100644 tests/hermes_cli/test_send_cmd.py
 create mode 100644 website/docs/guides/pipe-script-output.md

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index a893ee85846..bd8fe6c5cff 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -10119,6 +10119,12 @@ def main():
     )
     slack_parser.set_defaults(func=cmd_slack)
 
+    # =========================================================================
+    # send command — pipe shell-script output to any configured platform
+    # =========================================================================
+    from hermes_cli.send_cmd import register_send_subparser
+    register_send_subparser(subparsers)
+
     # =========================================================================
     # login command
     # =========================================================================
diff --git a/hermes_cli/send_cmd.py b/hermes_cli/send_cmd.py
new file mode 100644
index 00000000000..451bb3b4964
--- /dev/null
+++ b/hermes_cli/send_cmd.py
@@ -0,0 +1,445 @@
+"""CLI subcommand: ``hermes send`` — pipe text from shell scripts to any
+configured messaging platform (Telegram, Discord, Slack, Signal, SMS, etc.).
+
+This is a thin wrapper around ``tools.send_message_tool.send_message_tool``
+that exposes its functionality as a standalone CLI entry point so ops
+scripts, cron jobs, CI hooks, and monitoring daemons can reuse the gateway's
+already-configured credentials without having to reimplement each platform's
+REST API client.
+
+Design notes:
+
+* No LLM, no agent loop — the subcommand just resolves arguments, reads the
+  message body, calls the shared tool function, and prints/returns the
+  result. It is intentionally fast, cheap, and side-effect-only.
+* For platforms that send via bot token (Telegram, Discord, Slack, Signal,
+  SMS, WhatsApp-CloudAPI, …) no running gateway is required. The tool
+  talks directly to each platform's REST endpoint. For platforms that rely
+  on a persistent adapter connection (plugin platforms, Matrix in some
+  modes, …) a live gateway is needed; the underlying tool surfaces that
+  error to the caller.
+* Exit codes follow the classic Unix convention:
+    0 — delivery (or list) succeeded
+    1 — delivery failed at the platform level
+    2 — usage / argument / config error (argparse already uses 2)
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+
+
+_USAGE_EXIT = 2
+_FAILURE_EXIT = 1
+_SUCCESS_EXIT = 0
+
+
+def _read_message_body(
+    positional: Optional[str],
+    file_path: Optional[str],
+) -> Optional[str]:
+    """Resolve the message body from (in order):
+
+    1. An explicit positional message argument.
+    2. ``--file PATH`` or ``--file -`` (where ``-`` means stdin).
+    3. Piped stdin when it is not attached to a TTY.
+
+    Returns ``None`` when nothing is available — callers must treat that as
+    a usage error.
+    """
+    if positional:
+        return positional
+
+    if file_path:
+        if file_path == "-":
+            return sys.stdin.read()
+        try:
+            return Path(file_path).read_text()
+        except OSError as exc:
+            print(f"hermes send: cannot read {file_path}: {exc}", file=sys.stderr)
+            sys.exit(_USAGE_EXIT)
+
+    # Piped input: only consume stdin when it is not a TTY. Reading from a
+    # TTY would block the user in a half-broken "type your message" state,
+    # which is a poor default for an ops CLI.
+    if not sys.stdin.isatty():
+        data = sys.stdin.read()
+        if data:
+            return data
+
+    return None
+
+
+def _resolve_target(arg_to: Optional[str]) -> Optional[str]:
+    """Return a cleaned ``--to`` value, or ``None`` when nothing is set."""
+    if arg_to and arg_to.strip():
+        return arg_to.strip()
+    return None
+
+
+def _emit_result(
+    result_json: str,
+    *,
+    json_mode: bool,
+    quiet: bool,
+) -> int:
+    """Print the tool result in the requested format and return the exit code.
+
+    The underlying ``send_message_tool`` always returns a JSON string. We
+    parse it, decide success/failure, and format accordingly.
+    """
+    try:
+        payload = json.loads(result_json) if result_json else {}
+    except json.JSONDecodeError:
+        # Shouldn't happen with the shared tool, but be defensive — pass the
+        # raw string through so the user can still see what went wrong.
+        payload = {"error": "invalid JSON from send_message_tool", "raw": result_json}
+
+    if json_mode:
+        print(json.dumps(payload, indent=2))
+    elif quiet:
+        pass
+    else:
+        if payload.get("error"):
+            print(f"hermes send: {payload['error']}", file=sys.stderr)
+        elif payload.get("success"):
+            note = payload.get("note")
+            if note:
+                print(note)
+            else:
+                print("sent")
+        else:
+            # Unknown shape — dump it so nothing is silently dropped.
+            print(json.dumps(payload, indent=2))
+
+    if payload.get("error"):
+        return _FAILURE_EXIT
+    if payload.get("skipped"):
+        return _SUCCESS_EXIT
+    if payload.get("success"):
+        return _SUCCESS_EXIT
+    # Unknown / unexpected — treat as failure so scripts notice.
+    return _FAILURE_EXIT
+
+
+def _list_targets(platform_filter: Optional[str], *, json_mode: bool) -> int:
+    """Print the channel directory (all configured targets across platforms).
+
+    Uses ``load_directory()`` for structured JSON output and
+    ``format_directory_for_display()`` for the human-readable rendering that
+    the send_message tool itself shows to the model — keeps the two surfaces
+    identical.
+    """
+    try:
+        from gateway.channel_directory import (
+            format_directory_for_display,
+            load_directory,
+        )
+    except Exception as exc:
+        print(f"hermes send: failed to load channel directory: {exc}", file=sys.stderr)
+        return _FAILURE_EXIT
+
+    try:
+        raw = load_directory()
+    except Exception as exc:
+        print(f"hermes send: failed to read channel directory: {exc}", file=sys.stderr)
+        return _FAILURE_EXIT
+
+    platforms = dict(raw.get("platforms") or {})
+
+    if platform_filter:
+        key = platform_filter.strip().lower()
+        filtered = {k: v for k, v in platforms.items() if k.lower() == key}
+        if not filtered:
+            print(
+                f"hermes send: no targets found for platform '{platform_filter}'. "
+                f"Configured: {', '.join(sorted(platforms)) or '(none)'}",
+                file=sys.stderr,
+            )
+            return _FAILURE_EXIT
+        platforms = filtered
+
+    if json_mode:
+        print(json.dumps({"platforms": platforms}, indent=2, default=str))
+        return _SUCCESS_EXIT
+
+    if not any(platforms.values()):
+        print("No messaging platforms configured or no channels discovered yet.")
+        print("Set one up with `hermes gateway setup`, or run the gateway once so")
+        print("channel discovery can populate ~/.hermes/channel_directory.json.")
+        return _SUCCESS_EXIT
+
+    # Human display — when unfiltered, reuse the shared formatter the agent
+    # already sees. When filtered, build a minimal view ourselves.
+    if platform_filter is None:
+        print(format_directory_for_display())
+        return _SUCCESS_EXIT
+
+    for plat_name in sorted(platforms):
+        channels = platforms[plat_name]
+        print(f"{plat_name}:")
+        if not channels:
+            print("  (no channels discovered yet)")
+            continue
+        for ch in channels:
+            name = ch.get("name", "?")
+            chat_id = ch.get("id") or ch.get("chat_id") or ""
+            suffix = f"  [{chat_id}]" if chat_id and chat_id != name else ""
+            print(f"  {plat_name}:{name}{suffix}")
+        print()
+
+    return _SUCCESS_EXIT
+
+
+def _load_hermes_env() -> None:
+    """Populate ``os.environ`` from ``~/.hermes/.env`` AND bridge top-level
+    ``config.yaml`` keys into the environment so the underlying gateway
+    config loader sees platform credentials and home channel IDs.
+
+    ``send_message_tool`` reads tokens and home-channel IDs via
+    ``os.getenv(...)`` on each call. The gateway process does two things at
+    startup that ``hermes send`` must replicate when invoked standalone:
+
+    1. ``load_dotenv(~/.hermes/.env)`` — brings bot tokens into the env.
+    2. Bridge top-level simple values from ``~/.hermes/config.yaml`` into
+       ``os.environ`` (without overriding existing env vars). This is where
+       ``TELEGRAM_HOME_CHANNEL`` and friends live when the user saved them
+       via ``hermes config set``.
+
+    See ``gateway/run.py`` for the canonical version of this bridge — we
+    intentionally reimplement the minimum needed here so ``hermes send``
+    doesn't pull in the full gateway module just to resolve a home channel.
+    """
+    # Step 1: dotenv
+    try:
+        from dotenv import load_dotenv
+    except Exception:
+        load_dotenv = None  # type: ignore[assignment]
+
+    try:
+        from hermes_cli.config import get_hermes_home
+        home = get_hermes_home()
+    except Exception:
+        return
+
+    env_path = home / ".env"
+    if load_dotenv and env_path.exists():
+        try:
+            load_dotenv(str(env_path), override=True, encoding="utf-8")
+        except UnicodeDecodeError:
+            try:
+                load_dotenv(str(env_path), override=True, encoding="latin-1")
+            except Exception:
+                pass
+        except Exception:
+            pass
+
+    # Step 2: bridge top-level config.yaml values into the environment so
+    # gateway.config.load_gateway_config() sees them. Scalars only; don't
+    # override values already in the env.
+    import os
+    config_path = home / "config.yaml"
+    if not config_path.exists():
+        return
+
+    try:
+        import yaml  # type: ignore[import-not-found]
+    except Exception:
+        return
+
+    try:
+        with open(config_path, "r", encoding="utf-8") as fh:
+            raw = yaml.safe_load(fh) or {}
+    except Exception:
+        return
+
+    try:
+        from hermes_cli.config import _expand_env_vars
+        raw = _expand_env_vars(raw)
+    except Exception:
+        pass
+
+    if not isinstance(raw, dict):
+        return
+
+    for key, val in raw.items():
+        if not isinstance(val, (str, int, float, bool)):
+            continue
+        if key in os.environ:
+            continue
+        os.environ[key] = str(val)
+
+
+def cmd_send(args: argparse.Namespace) -> None:
+    """Entry point wired into the top-level argparse dispatcher."""
+
+    # Bridge ~/.hermes/.env and ~/.hermes/config.yaml into os.environ so the
+    # gateway config loader (invoked downstream by send_message_tool and by
+    # the channel directory) can see platform credentials and home channels.
+    _load_hermes_env()
+
+    # --list short-circuits everything else.
+    if getattr(args, "list_targets", False):
+        # When `--list telegram` is used, argparse stores "telegram" in the
+        # `message` positional (since list_targets takes no argument).
+        platform_filter = getattr(args, "message", None)
+        exit_code = _list_targets(platform_filter, json_mode=getattr(args, "json", False))
+        sys.exit(exit_code)
+
+    target = _resolve_target(getattr(args, "to", None))
+    if not target:
+        print(
+            "hermes send: --to PLATFORM[:channel[:thread]] is required\n"
+            "Examples:\n"
+            "  hermes send --to telegram \"hello\"\n"
+            "  hermes send --to discord:#ops --file report.md\n"
+            "  hermes send --list      # list available targets",
+            file=sys.stderr,
+        )
+        sys.exit(_USAGE_EXIT)
+
+    message = _read_message_body(
+        getattr(args, "message", None),
+        getattr(args, "file", None),
+    )
+    if message is None or not message.strip():
+        print(
+            "hermes send: no message provided. Pass text as a positional "
+            "argument, use --file PATH, or pipe data via stdin.",
+            file=sys.stderr,
+        )
+        sys.exit(_USAGE_EXIT)
+
+    # Optional: prepend a subject line. Useful for alerting scripts that
+    # want a consistent header without inlining it into every call.
+    subject = getattr(args, "subject", None)
+    if subject:
+        message = f"{subject}\n\n{message.lstrip()}"
+
+    # Import lazily so `hermes send --help` stays fast and does not pull in
+    # the full tool registry / gateway config stack.
+    from tools.send_message_tool import send_message_tool
+
+    # send_message_tool auto-loads gateway config + env and routes to the
+    # appropriate platform adapter (bot-token path for Telegram/Discord/Slack/
+    # Signal/SMS/WhatsApp; live-adapter path for plugin platforms).
+    #
+    # It expects the standard tool-call dict and returns a JSON string.
+    tool_args = {
+        "action": "send",
+        "target": target,
+        "message": message,
+    }
+
+    result = send_message_tool(tool_args)
+    exit_code = _emit_result(
+        result,
+        json_mode=getattr(args, "json", False),
+        quiet=getattr(args, "quiet", False),
+    )
+    sys.exit(exit_code)
+
+
+def register_send_subparser(subparsers) -> argparse.ArgumentParser:
+    """Create the ``send`` subparser and return it.
+
+    Kept as a standalone function so the top-level parser builder can wire
+    it in next to the other messaging subcommands without cluttering
+    ``_parser.py`` or ``main.py``.
+    """
+    parser = subparsers.add_parser(
+        "send",
+        help="Send a message to a configured platform (scripts, cron jobs, CI).",
+        description=(
+            "Pipe text from any shell script to any messaging platform Hermes "
+            "is already configured for. Reuses the gateway's platform "
+            "credentials (~/.hermes/.env + ~/.hermes/config.yaml) — no LLM, "
+            "no agent loop, no running gateway required for bot-token "
+            "platforms like Telegram/Discord/Slack/Signal."
+        ),
+        epilog=(
+            "Examples:\n"
+            "  hermes send --to telegram \"deploy finished\"\n"
+            "  echo \"RAM 92%\" | hermes send --to telegram:-1001234567890\n"
+            "  hermes send --to discord:#ops --file /tmp/report.md\n"
+            "  hermes send --to slack:#eng --subject \"[CI]\" --file build.log\n"
+            "  hermes send --list                  # all platforms\n"
+            "  hermes send --list telegram         # filter by platform\n"
+            "\n"
+            "Exit codes: 0 ok, 1 delivery/backend error, 2 usage error."
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    parser.add_argument(
+        "-t",
+        "--to",
+        metavar="TARGET",
+        default=None,
+        help=(
+            "Delivery target. Format: 'platform' (home channel), "
+            "'platform:chat_id', 'platform:chat_id:thread_id', or "
+            "'platform:#channel-name'. Examples: telegram, "
+            "telegram:-1001234567890:17585, discord:#ops, slack:C0123ABCD, "
+            "signal:+15551234567."
+        ),
+    )
+
+    parser.add_argument(
+        "message",
+        nargs="?",
+        default=None,
+        help="Message text. If omitted, read from --file or stdin.",
+    )
+
+    # Legacy / convenience positional removed — use --to for clarity.
+
+    parser.add_argument(
+        "-f",
+        "--file",
+        metavar="PATH",
+        default=None,
+        help="Read message body from PATH. Use '-' to force stdin.",
+    )
+
+    parser.add_argument(
+        "-s",
+        "--subject",
+        metavar="LINE",
+        default=None,
+        help="Prepend a subject/header line before the message body.",
+    )
+
+    parser.add_argument(
+        "-l",
+        "--list",
+        dest="list_targets",
+        action="store_true",
+        default=False,
+        help="List available targets. Optional positional filter: `hermes send --list telegram`.",
+    )
+
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        default=False,
+        help="Suppress stdout on success (exit code only).",
+    )
+
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        default=False,
+        help="Emit raw JSON result instead of human-readable output.",
+    )
+
+    parser.set_defaults(func=cmd_send)
+    return parser
+
+
+__all__ = ["cmd_send", "register_send_subparser"]
diff --git a/tests/hermes_cli/test_send_cmd.py b/tests/hermes_cli/test_send_cmd.py
new file mode 100644
index 00000000000..9202315e3d4
--- /dev/null
+++ b/tests/hermes_cli/test_send_cmd.py
@@ -0,0 +1,387 @@
+"""Tests for the ``hermes send`` CLI subcommand.
+
+Covers the argument parsing / stdin / file / list behavior of
+``hermes_cli.send_cmd``. The underlying ``send_message_tool`` is stubbed so
+no network I/O or gateway is required.
+"""
+
+from __future__ import annotations
+
+import io
+import json
+from pathlib import Path
+
+import pytest
+
+from hermes_cli import send_cmd
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _parse(argv):
+    """Build the top-level parser and return the parsed args for ``argv``."""
+    import argparse
+
+    parser = argparse.ArgumentParser(prog="hermes")
+    subparsers = parser.add_subparsers(dest="command")
+    send_cmd.register_send_subparser(subparsers)
+    return parser.parse_args(["send", *argv])
+
+
+class _FakeTool:
+    """Replacement for ``tools.send_message_tool.send_message_tool``."""
+
+    def __init__(self, payload):
+        self.payload = payload
+        self.calls = []
+
+    def __call__(self, args, **_kw):
+        self.calls.append(dict(args))
+        return json.dumps(self.payload)
+
+
+@pytest.fixture
+def fake_tool(monkeypatch):
+    """Install a fake send_message_tool and return the stub for inspection."""
+    import sys
+    import types
+
+    fake = _FakeTool({"success": True, "message_id": "m123"})
+
+    mod = types.ModuleType("tools.send_message_tool")
+    mod.send_message_tool = fake
+    # Register the stub so ``from tools.send_message_tool import ...`` inside
+    # cmd_send resolves to our fake. Also patch the parent ``tools`` package
+    # entry so attribute lookup works.
+    monkeypatch.setitem(sys.modules, "tools.send_message_tool", mod)
+    return fake
+
+
+# ---------------------------------------------------------------------------
+# Happy path
+# ---------------------------------------------------------------------------
+
+
+def test_positional_message_success(fake_tool, capsys):
+    args = _parse(["--to", "telegram", "hello world"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    assert fake_tool.calls == [
+        {"action": "send", "target": "telegram", "message": "hello world"}
+    ]
+    out = capsys.readouterr()
+    assert "sent" in out.out or out.out == ""  # "sent" is the default success banner
+
+
+def test_stdin_message(fake_tool, monkeypatch, capsys):
+    # Piped stdin (not a tty) should be consumed as the message body.
+    monkeypatch.setattr("sys.stdin", io.StringIO("piped body\n"))
+    # Force isatty to return False so the CLI reads from stdin.
+    monkeypatch.setattr("sys.stdin.isatty", lambda: False)
+    args = _parse(["--to", "discord:#ops"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    assert fake_tool.calls[0]["message"] == "piped body\n"
+    assert fake_tool.calls[0]["target"] == "discord:#ops"
+
+
+def test_file_message(fake_tool, tmp_path):
+    body = tmp_path / "msg.txt"
+    body.write_text("from a file\n")
+    args = _parse(["--to", "slack:#eng", "--file", str(body)])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    assert fake_tool.calls[0]["message"] == "from a file\n"
+
+
+def test_file_dash_means_stdin(fake_tool, monkeypatch):
+    monkeypatch.setattr("sys.stdin", io.StringIO("dash body"))
+    args = _parse(["--to", "telegram", "--file", "-"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    assert fake_tool.calls[0]["message"] == "dash body"
+
+
+def test_subject_prepends_header(fake_tool):
+    args = _parse(["--to", "telegram", "--subject", "[CI]", "body text"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    assert fake_tool.calls[0]["message"] == "[CI]\n\nbody text"
+
+
+def test_json_mode_emits_payload(fake_tool, capsys):
+    args = _parse(["--to", "telegram", "--json", "hi"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    out = capsys.readouterr().out
+    payload = json.loads(out)
+    assert payload.get("success") is True
+    assert payload.get("message_id") == "m123"
+
+
+def test_quiet_suppresses_stdout(fake_tool, capsys):
+    args = _parse(["--to", "telegram", "--quiet", "shh"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    out = capsys.readouterr()
+    assert out.out == ""
+
+
+# ---------------------------------------------------------------------------
+# Error paths
+# ---------------------------------------------------------------------------
+
+
+def test_missing_target(fake_tool, capsys, monkeypatch):
+    # Ensure stdin is a tty so the CLI does not try to consume it as a body.
+    monkeypatch.setattr("sys.stdin.isatty", lambda: True)
+    args = _parse(["hello"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 2
+    err = capsys.readouterr().err
+    assert "--to" in err
+
+
+def test_missing_message(fake_tool, capsys, monkeypatch):
+    monkeypatch.setattr("sys.stdin.isatty", lambda: True)
+    args = _parse(["--to", "telegram"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 2
+    err = capsys.readouterr().err
+    assert "no message" in err.lower()
+
+
+def test_file_not_found_is_usage_error(fake_tool, capsys, monkeypatch):
+    monkeypatch.setattr("sys.stdin.isatty", lambda: True)
+    args = _parse(["--to", "telegram", "--file", "/nonexistent/does-not-exist.txt"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 2
+    err = capsys.readouterr().err
+    assert "cannot read" in err.lower()
+
+
+def test_tool_error_returns_failure_exit(monkeypatch, capsys):
+    import sys as _sys
+    import types as _types
+
+    fake_mod = _types.ModuleType("tools.send_message_tool")
+
+    def _bad_tool(args, **_kw):
+        return json.dumps({"error": "platform blew up"})
+
+    fake_mod.send_message_tool = _bad_tool
+    monkeypatch.setitem(_sys.modules, "tools.send_message_tool", fake_mod)
+
+    args = _parse(["--to", "telegram", "nope"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 1
+    err = capsys.readouterr().err
+    assert "platform blew up" in err
+
+
+def test_skipped_result_is_success(monkeypatch):
+    import sys as _sys
+    import types as _types
+
+    fake_mod = _types.ModuleType("tools.send_message_tool")
+    fake_mod.send_message_tool = lambda args, **_kw: json.dumps(
+        {"success": True, "skipped": True, "reason": "duplicate"}
+    )
+    monkeypatch.setitem(_sys.modules, "tools.send_message_tool", fake_mod)
+
+    args = _parse(["--to", "telegram", "dup"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+
+
+# ---------------------------------------------------------------------------
+# --list
+# ---------------------------------------------------------------------------
+
+
+def test_list_human_output(monkeypatch, capsys):
+    import sys as _sys
+    import types as _types
+
+    fake_dir = _types.ModuleType("gateway.channel_directory")
+    fake_dir.format_directory_for_display = lambda: "Available messaging targets:\n\nTelegram:\n  telegram:-100123\n"
+    fake_dir.load_directory = lambda: {
+        "platforms": {"telegram": [{"id": "-100123", "name": "Test Group"}]}
+    }
+    monkeypatch.setitem(_sys.modules, "gateway.channel_directory", fake_dir)
+
+    args = _parse(["--list"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    out = capsys.readouterr().out
+    assert "Telegram" in out
+
+
+def test_list_json(monkeypatch, capsys):
+    import sys as _sys
+    import types as _types
+
+    fake_dir = _types.ModuleType("gateway.channel_directory")
+    fake_dir.format_directory_for_display = lambda: "(ignored in json mode)"
+    fake_dir.load_directory = lambda: {
+        "platforms": {"telegram": [{"id": "-100123", "name": "Test Group"}]}
+    }
+    monkeypatch.setitem(_sys.modules, "gateway.channel_directory", fake_dir)
+
+    args = _parse(["--list", "--json"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    out = capsys.readouterr().out
+    payload = json.loads(out)
+    assert payload["platforms"]["telegram"][0]["name"] == "Test Group"
+
+
+def test_list_filter_platform(monkeypatch, capsys):
+    import sys as _sys
+    import types as _types
+
+    fake_dir = _types.ModuleType("gateway.channel_directory")
+    fake_dir.format_directory_for_display = lambda: "(should not be called when filter set)"
+    fake_dir.load_directory = lambda: {
+        "platforms": {
+            "telegram": [{"id": "-100123", "name": "TG Chat"}],
+            "discord": [{"id": "555", "name": "bot-home"}],
+        }
+    }
+    monkeypatch.setitem(_sys.modules, "gateway.channel_directory", fake_dir)
+
+    # When --list is set, argparse puts the optional bareword in the
+    # `message` positional slot (where the send-mode body would go).
+    args = _parse(["--list", "telegram"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 0
+    out = capsys.readouterr().out
+    assert "telegram" in out.lower()
+    assert "discord" not in out.lower()
+
+
+def test_list_unknown_platform_fails(monkeypatch, capsys):
+    import sys as _sys
+    import types as _types
+
+    fake_dir = _types.ModuleType("gateway.channel_directory")
+    fake_dir.format_directory_for_display = lambda: ""
+    fake_dir.load_directory = lambda: {"platforms": {"telegram": []}}
+    monkeypatch.setitem(_sys.modules, "gateway.channel_directory", fake_dir)
+
+    args = _parse(["--list", "pigeon-post"])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 1
+    err = capsys.readouterr().err
+    assert "pigeon-post" in err
+
+
+# ---------------------------------------------------------------------------
+# Parser registration contract
+# ---------------------------------------------------------------------------
+
+
+def test_register_send_subparser_is_reusable():
+    """Sanity check: the registrar returns a parser and wires ``cmd_send``."""
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="command")
+    send_parser = send_cmd.register_send_subparser(subparsers)
+    assert send_parser is not None
+    args = parser.parse_args(["send", "--to", "telegram", "hi"])
+    assert args.func is send_cmd.cmd_send
+    assert args.to == "telegram"
+    assert args.message == "hi"
+
+
+# ---------------------------------------------------------------------------
+# Env loader
+# ---------------------------------------------------------------------------
+
+
+def test_load_hermes_env_bridges_config_yaml_scalars(tmp_path, monkeypatch):
+    """Top-level config.yaml scalars should be bridged into os.environ.
+
+    This mirrors the gateway/run.py bootstrap behavior: without this, running
+    ``hermes send`` from a fresh shell cannot resolve the home channel
+    because ``TELEGRAM_HOME_CHANNEL`` (saved by ``hermes config set``) lives
+    in config.yaml, not in .env — and the gateway's config loader reads via
+    ``os.getenv(...)``.
+    """
+    import os
+
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / ".env").write_text("SOME_TOKEN=abc123\n")
+    (hermes_home / "config.yaml").write_text(
+        "TELEGRAM_HOME_CHANNEL: '5550001111'\nnested:\n  ignored: true\n"
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("TELEGRAM_HOME_CHANNEL", raising=False)
+    monkeypatch.delenv("SOME_TOKEN", raising=False)
+
+    # Force get_hermes_home() to re-resolve under the patched env.
+    from importlib import reload
+
+    import hermes_cli.config as _hc_config
+    reload(_hc_config)
+
+    send_cmd._load_hermes_env()
+
+    assert os.environ.get("SOME_TOKEN") == "abc123"
+    assert os.environ.get("TELEGRAM_HOME_CHANNEL") == "5550001111"
+
+
+def test_load_hermes_env_does_not_override_existing(tmp_path, monkeypatch):
+    """Existing env vars must not be clobbered by config.yaml values."""
+    import os
+
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "config.yaml").write_text("TELEGRAM_HOME_CHANNEL: yaml_value\n")
+
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "env_value")
+
+    from importlib import reload
+    import hermes_cli.config as _hc_config
+    reload(_hc_config)
+
+    send_cmd._load_hermes_env()
+
+    assert os.environ.get("TELEGRAM_HOME_CHANNEL") == "env_value"
+
+
+def test_load_hermes_env_handles_missing_files(tmp_path, monkeypatch):
+    """No .env or config.yaml should be a silent no-op, not an exception."""
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    from importlib import reload
+    import hermes_cli.config as _hc_config
+    reload(_hc_config)
+
+    # Should not raise.
+    send_cmd._load_hermes_env()
diff --git a/website/docs/developer-guide/gateway-internals.md b/website/docs/developer-guide/gateway-internals.md
index d0521d4816d..ebbe6c0e970 100644
--- a/website/docs/developer-guide/gateway-internals.md
+++ b/website/docs/developer-guide/gateway-internals.md
@@ -186,7 +186,7 @@ Outgoing deliveries (`gateway/delivery.py`) handle:
 
 - **Direct reply** — send response back to the originating chat
 - **Home channel delivery** — route cron job outputs and background results to a configured home channel
-- **Explicit target delivery** — `send_message` tool specifying `telegram:-1001234567890`
+- **Explicit target delivery** — `send_message` tool specifying `telegram:-1001234567890`, or the [`hermes send` CLI](/docs/guides/pipe-script-output) wrapping the same tool for shell scripts
 - **Cross-platform delivery** — deliver to a different platform than the originating message
 
 Cron job deliveries are NOT mirrored into gateway session history — they live in their own cron session only. This is a deliberate design choice to avoid message alternation violations.
diff --git a/website/docs/guides/automate-with-cron.md b/website/docs/guides/automate-with-cron.md
index 46becd88574..aa4fbee1ca2 100644
--- a/website/docs/guides/automate-with-cron.md
+++ b/website/docs/guides/automate-with-cron.md
@@ -14,8 +14,9 @@ For the full feature reference, see [Scheduled Tasks (Cron)](/docs/user-guide/fe
 Cron jobs run in fresh agent sessions with no memory of your current chat. Prompts must be **completely self-contained** — include everything the agent needs to know.
 :::
 
-:::tip Don't need the LLM? Use no-agent mode.
-For recurring watchdogs where the script already produces the exact message you want to send (memory alerts, disk alerts, CI pings, heartbeats), skip the LLM entirely with [script-only cron jobs](/docs/guides/cron-script-only). Zero tokens, same scheduler. You can ask Hermes to set one up for you in chat — the `cronjob` tool knows when to pick `no_agent=True` and writes the script for you.
+:::tip Don't need the LLM? You have two zero-token options.
+- **Recurring watchdog** where the script already produces the exact message (memory alerts, disk alerts, heartbeats): use [script-only cron jobs](/docs/guides/cron-script-only). Same scheduler, no LLM. You can ask Hermes to set one up for you in chat — the `cronjob` tool knows when to pick `no_agent=True` and writes the script for you.
+- **One-shot from a script that's already running** (CI step, post-commit hook, deploy script, externally-scheduled monitor): use [`hermes send`](/docs/guides/pipe-script-output) to pipe stdout or a file straight to Telegram / Discord / Slack / etc. without setting up a cron entry.
 :::
 
 ---
diff --git a/website/docs/guides/pipe-script-output.md b/website/docs/guides/pipe-script-output.md
new file mode 100644
index 00000000000..483d45206a3
--- /dev/null
+++ b/website/docs/guides/pipe-script-output.md
@@ -0,0 +1,249 @@
+---
+sidebar_position: 12
+title: "Pipe Script Output to Messaging Platforms"
+description: "Send text from any shell script, cron job, CI hook, or monitoring daemon to Telegram, Discord, Slack, Signal, and other platforms using `hermes send`."
+---
+
+# Pipe Script Output to Messaging Platforms
+
+`hermes send` is a small, scriptable CLI that pushes a message to any
+messaging platform Hermes is already configured for. Think of it as a
+cross-platform `curl` for notifications — you don't need a running
+gateway, you don't need an LLM, and you don't need to re-paste bot tokens
+into each of your scripts.
+
+Use it for:
+
+- System monitoring (memory, disk, GPU temp, long-running job finished)
+- CI/CD notifications (deploy done, test failure)
+- Cron scripts that need to ping you with results
+- Quick one-shot messages from a terminal
+- Piping any tool's output anywhere (`make | hermes send --to slack:#builds`)
+
+The command reuses the same credentials and platform adapters that `hermes
+gateway` already uses, so there's no second configuration surface to
+maintain.
+
+---
+
+## Quick Start
+
+```bash
+# Plain text to the home channel for a platform
+hermes send --to telegram "deploy finished"
+
+# Pipe in stdout from anything
+echo "RAM 92%" | hermes send --to telegram:-1001234567890
+
+# Send a file
+hermes send --to discord:#ops --file /tmp/report.md
+
+# Attach a subject/header line
+hermes send --to slack:#eng --subject "[CI] build.log" --file build.log
+
+# Thread target (Telegram topic, Discord thread)
+hermes send --to telegram:-1001234567890:17585 "threaded reply"
+
+# List every configured target
+hermes send --list
+
+# Filter by platform
+hermes send --list telegram
+```
+
+---
+
+## Argument Reference
+
+| Flag | Description |
+|------|-------------|
+| `-t, --to TARGET` | Destination. See [target formats](#target-formats). |
+| `message` (positional) | Message text. Omit to read from `--file` or stdin. |
+| `-f, --file PATH` | Read the body from a file. `--file -` forces stdin. |
+| `-s, --subject LINE` | Prepend a header/subject line before the body. |
+| `-l, --list` | List available targets. Optional positional platform filter. |
+| `-q, --quiet` | No stdout on success (exit code only — ideal for scripts). |
+| `--json` | Emit the raw JSON result of the send. |
+| `-h, --help` | Show the built-in help text. |
+
+### Target Formats
+
+| Format | Example | Meaning |
+|--------|---------|---------|
+| `platform` | `telegram` | Send to the platform's configured home channel |
+| `platform:chat_id` | `telegram:-1001234567890` | Specific numeric chat / group / user |
+| `platform:chat_id:thread_id` | `telegram:-1001234567890:17585` | Specific thread or Telegram forum topic |
+| `platform:#channel` | `discord:#ops` | Human-friendly channel name (resolved against the channel directory) |
+| `platform:+E164` | `signal:+15551234567` | Phone-addressed platforms: Signal, SMS, WhatsApp |
+
+Any platform Hermes ships adapters for works as a target:
+`telegram`, `discord`, `slack`, `signal`, `sms`, `whatsapp`, `matrix`,
+`mattermost`, `feishu`, `dingtalk`, `wecom`, `weixin`, `email`, and
+others.
+
+### Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| `0` | Send (or list) succeeded |
+| `1` | Delivery failed at the platform level (auth, permissions, network) |
+| `2` | Usage / argument / config error |
+
+Exit codes follow the standard Unix convention so your scripts can
+branch on them the same way they would on `curl` or `grep`.
+
+---
+
+## Message Body Resolution
+
+`hermes send` resolves the message body in this order:
+
+1. **Positional argument** — `hermes send --to telegram "hi"`
+2. **`--file PATH`** — `hermes send --to telegram --file msg.txt`
+3. **Piped stdin** — `echo hi | hermes send --to telegram`
+
+When stdin is a TTY (no pipe), Hermes does **not** wait for input — you'll
+get a clear usage error instead. This keeps scripts from hanging if they
+accidentally omit the body.
+
+---
+
+## Real-World Examples
+
+### Monitoring: Memory / Disk Alerts
+
+Replace ad-hoc `curl https://api.telegram.org/...` calls in your watchdogs
+with a single portable line:
+
+```bash
+#!/usr/bin/env bash
+ram_pct=$(free | awk '/^Mem:/ {printf "%d", $3 * 100 / $2}')
+if [ "$ram_pct" -ge 85 ]; then
+  hermes send --to telegram --subject "⚠ MEMORY WARNING" \
+    "RAM ${ram_pct}% on $(hostname)"
+fi
+```
+
+Because `hermes send` reuses your Hermes config, the same script works on
+any host where Hermes is installed — no need to export bot tokens into
+each machine's environment manually.
+
+:::tip Don't alert the gateway about itself
+For watchdogs that might fire when the gateway itself is struggling (OOM
+alerts, disk-full alerts), keep using a minimal `curl` call instead of
+`hermes send`. If the Python interpreter can't load because the box is
+thrashing, you still want that alert to go out.
+:::
+
+### CI / CD: Build and Test Results
+
+```bash
+# In .github/workflows/deploy.yml or any CI script
+if ./scripts/deploy.sh; then
+  hermes send --to slack:#deploys "✅ ${CI_COMMIT_SHA:0:7} deployed"
+else
+  tail -n 100 deploy.log | hermes send \
+    --to slack:#deploys --subject "❌ deploy failed"
+  exit 1
+fi
+```
+
+### Cron: Daily Report
+
+```bash
+# Crontab entry
+0 9 * * * /usr/local/bin/generate-metrics.sh \
+  | /home/me/.hermes/bin/hermes send \
+      --to telegram --subject "Daily metrics $(date +%Y-%m-%d)"
+```
+
+### Long-Running Tasks: Ping When Done
+
+```bash
+./train.py --epochs 200 && \
+  hermes send --to telegram "training done" || \
+  hermes send --to telegram "training failed (exit $?)"
+```
+
+### Scripting with `--json` and `--quiet`
+
+```bash
+# Hard-fail a script if delivery fails; don't clutter logs on success
+hermes send --to telegram --quiet "keepalive" || {
+  echo "Telegram delivery failed" >&2
+  exit 1
+}
+
+# Capture the message ID for later editing / threading
+msg_id=$(hermes send --to discord:#ops --json "build started" \
+  | jq -r .message_id)
+```
+
+---
+
+## Does `hermes send` Need the Gateway Running?
+
+**Usually no.** For any bot-token platform — Telegram, Discord, Slack,
+Signal, SMS, WhatsApp Cloud API, and most others — `hermes send` calls
+the platform's REST endpoint directly using credentials from
+`~/.hermes/.env` and `~/.hermes/config.yaml`. It's a standalone subprocess
+that exits as soon as the message is delivered.
+
+A live gateway is only required for **plugin platforms** that rely on a
+persistent adapter connection (for example, a custom plugin that keeps
+a long-lived WebSocket open). In that case you'll get a clear error
+pointing at the gateway; start it with `hermes gateway start` and retry.
+
+---
+
+## Listing and Discovering Targets
+
+Before sending to a specific channel, you can inspect what's available:
+
+```bash
+# Every target across every configured platform
+hermes send --list
+
+# Just Telegram targets
+hermes send --list telegram
+
+# Machine-readable
+hermes send --list --json
+```
+
+The listing is built from `~/.hermes/channel_directory.json`, which the
+gateway refreshes every few minutes while it's running. If you see
+"no channels discovered yet", start the gateway once (`hermes gateway
+start`) so it can populate the cache.
+
+Human-friendly names (`discord:#ops`, `slack:#engineering`) are resolved
+against this cache at send time, so you don't need to memorize numeric
+IDs.
+
+---
+
+## Comparison with Other Approaches
+
+| Approach | Multi-platform | Reuses Hermes creds | Needs gateway | Best for |
+|----------|----------------|---------------------|---------------|----------|
+| `hermes send` | ✅ | ✅ | No (bot-token) | Everything below |
+| Raw `curl` to each platform | Each scripted separately | Manual | No | Critical watchdogs |
+| `cron` job with `--deliver` | ✅ | ✅ | No | Scheduled agent tasks |
+| `send_message` agent tool | ✅ | ✅ | No | Inside an agent loop |
+
+`hermes send` is intentionally the simplest possible surface. If you need
+an agent to decide what to say, use the `send_message` tool from within a
+chat or cron job. If you need a scheduled run with LLM-generated content,
+use `cronjob(action='create', prompt=...)` with `deliver='telegram:...'`.
+If you just need to pipe a raw string, reach for `hermes send`.
+
+---
+
+## Related
+
+- [Automate Anything with Cron](/docs/guides/automate-with-cron) —
+  scheduled jobs whose output auto-delivers to any platform.
+- [Gateway Internals](/docs/developer-guide/gateway-internals) —
+  the delivery router that `hermes send` shares with cron delivery.
+- [Messaging Platform Setup](/docs/user-guide/messaging/) —
+  one-time configuration for each platform.

From 9b82586c6b6dd628af273b3c6875e0142f798089 Mon Sep 17 00:00:00 2001
From: Guillaume Meyer <guillaume.meyer@outlook.com>
Date: Sat, 16 May 2026 22:55:28 +0000
Subject: [PATCH 152/218] fix(plugins): surface category-namespaced plugins in
 hermes plugins list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`_discover_all_plugins()` in plugins_cmd.py did a flat scan of the
bundled and user plugin directories — only direct children with a
plugin.yaml were surfaced. Category directories like `observability/`,
`image_gen/`, `platforms/`, `model-providers/`, `web/`, and `video_gen/`
have no plugin.yaml of their own, so their nested plugins
(`observability/langfuse`, `image_gen/openai`, etc.) never appeared in
`hermes plugins list` or the interactive `hermes plugins` UI — even
though the runtime loader (`PluginManager._scan_directory_level`)
discovers them correctly and they do load at runtime.

This broke the documented promise that bundled plugins appear in
`hermes plugins list` and the interactive UI before being enabled,
and made it look like `observability/langfuse` didn't exist.

Refactor `_discover_all_plugins()` to mirror the loader's recursion
(depth cap = 2, same skip set, user overrides bundled on key collision).
Return the path-derived registry key (e.g. `observability/langfuse`) as
the displayed name, matching what the user passes to
`hermes plugins enable …` / writes under `plugins.enabled` in
config.yaml.

Also clarify the plugins docs: spell out that sub-category plugins
surface by their `<category>/<plugin>` key in `hermes plugins list` /
interactive UI, add an `observability/langfuse` example to the command
reference, and include a nested entry in the interactive-UI mock.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hermes_cli/plugins_cmd.py                   | 86 +++++++++++++--------
 website/docs/user-guide/features/plugins.md | 24 +++---
 2 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index 675989d170e..6fa3c59c7a3 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -708,55 +708,79 @@ def _plugin_exists(name: str) -> bool:
 
 
 def _discover_all_plugins() -> list:
-    """Return a list of (name, version, description, source, dir_path) for
-    every plugin the loader can see — user + bundled + project.
+    """Return a list of (key, version, description, source, dir_path) for
+    every plugin the loader can see — user + bundled.
 
-    Matches the ordering/dedup of ``PluginManager.discover_and_load``:
-    bundled first, then user, then project; user overrides bundled on
-    name collision.
+    Mirrors :meth:`PluginManager._scan_directory_level` so category-namespaced
+    plugins (``observability/langfuse``, ``image_gen/openai``) surface here
+    just like flat ones (``disk-cleanup``). A subdirectory with no
+    ``plugin.yaml`` of its own is treated as a category and recursed into
+    one level deeper (depth capped at 2, same as the loader).
+
+    The returned ``key`` is the path-derived registry key — the value the
+    user types into ``hermes plugins enable <key>``. For category-namespaced
+    plugins that's ``<category>/<dirname>``; for flat plugins it's the
+    manifest's ``name`` (or the directory name if the manifest omits it).
+
+    User entries override bundled on key collision, matching
+    ``PluginManager.discover_and_load``.
     """
     try:
         import yaml
     except ImportError:
         yaml = None
 
-    seen: dict = {}  # name -> (name, version, description, source, path)
+    seen: dict = {}  # key -> (key, version, description, source, path)
 
-    # Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
-    from hermes_cli.plugins import get_bundled_plugins_dir
-    repo_plugins = get_bundled_plugins_dir()
-    for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
+    def _scan(base: Path, source: str, prefix: str, depth: int) -> None:
         if not base.is_dir():
-            continue
+            return
         for d in sorted(base.iterdir()):
             if not d.is_dir():
                 continue
-            if source == "bundled" and d.name in {"memory", "context_engine"}:
+            if (
+                depth == 0
+                and source == "bundled"
+                and d.name in {"memory", "context_engine"}
+            ):
                 continue
             manifest_file = d / "plugin.yaml"
             if not manifest_file.exists():
                 manifest_file = d / "plugin.yml"
-            if not manifest_file.exists():
+
+            if manifest_file.exists():
+                manifest_name = d.name
+                version = ""
+                description = ""
+                if yaml:
+                    try:
+                        with open(manifest_file, encoding="utf-8") as f:
+                            manifest = yaml.safe_load(f) or {}
+                        manifest_name = manifest.get("name", d.name)
+                        version = manifest.get("version", "")
+                        description = manifest.get("description", "")
+                    except Exception:
+                        pass
+                key = f"{prefix}/{d.name}" if prefix else manifest_name
+                if key in seen and source == "bundled":
+                    continue
+                src_label = source
+                if source == "user" and (d / ".git").exists():
+                    src_label = "git"
+                seen[key] = (key, version, description, src_label, d)
                 continue
-            name = d.name
-            version = ""
-            description = ""
-            if yaml:
-                try:
-                    with open(manifest_file, encoding="utf-8") as f:
-                        manifest = yaml.safe_load(f) or {}
-                    name = manifest.get("name", d.name)
-                    version = manifest.get("version", "")
-                    description = manifest.get("description", "")
-                except Exception:
-                    pass
-            # User plugins override bundled on name collision.
-            if name in seen and source == "bundled":
+
+            # No manifest at this level — treat as a category namespace and
+            # recurse one level deeper. Cap at depth 2 (same as the loader).
+            if depth >= 1:
                 continue
-            src_label = source
-            if source == "user" and (d / ".git").exists():
-                src_label = "git"
-            seen[name] = (name, version, description, src_label, d)
+            sub_prefix = f"{prefix}/{d.name}" if prefix else d.name
+            _scan(d, source, sub_prefix, depth + 1)
+
+    from hermes_cli.plugins import get_bundled_plugins_dir
+    _scan(get_bundled_plugins_dir(), "bundled", "", 0)
+    _scan(_plugins_dir(), "user", "", 0)
+
     return list(seen.values())
 
 
diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md
index e9dc2910889..9572f3538a6 100644
--- a/website/docs/user-guide/features/plugins.md
+++ b/website/docs/user-guide/features/plugins.md
@@ -142,6 +142,8 @@ Within each source, Hermes also recognizes sub-category directories that route p
 
 User plugins at `~/.hermes/plugins/model-providers/<name>/` and `~/.hermes/plugins/memory/<name>/` override bundled plugins of the same name — last-writer-wins in `register_provider()` / `register_memory_provider()`. Drop a directory in, and it replaces the built-in without any repo edits.
 
+Sub-category plugins surface in `hermes plugins list` and the interactive `hermes plugins` UI under their **path-derived key** — e.g. `observability/langfuse`, `image_gen/openai`, `platforms/teams`. That key (not the bare manifest `name:`) is the value you pass to `hermes plugins enable …` / `disable …` and the string to add under `plugins.enabled` in `config.yaml`.
+
 ## Plugins are opt-in (with a few exceptions)
 
 **General plugins and user-installed backends are disabled by default** — discovery finds them (so they show up in `hermes plugins` and `/plugins`), but nothing with hooks or tools loads until you add the plugin's name to `plugins.enabled` in `~/.hermes/config.yaml`. This stops third-party code from running without your explicit consent.
@@ -263,17 +265,20 @@ Declarative plugins are symlinked with a `nix-managed-` prefix — they coexist
 ## Managing plugins
 
 ```bash
-hermes plugins                               # unified interactive UI
-hermes plugins list                          # table: enabled / disabled / not enabled
-hermes plugins install user/repo             # install from Git, then prompt Enable? [y/N]
-hermes plugins install user/repo --enable    # install AND enable (no prompt)
-hermes plugins install user/repo --no-enable # install but leave disabled (no prompt)
-hermes plugins update my-plugin              # pull latest
-hermes plugins remove my-plugin              # uninstall
-hermes plugins enable my-plugin              # add to allow-list
-hermes plugins disable my-plugin             # remove from allow-list + add to disabled
+hermes plugins                                       # unified interactive UI
+hermes plugins list                                  # table: enabled / disabled / not enabled
+hermes plugins install user/repo                     # install from Git, then prompt Enable? [y/N]
+hermes plugins install user/repo --enable            # install AND enable (no prompt)
+hermes plugins install user/repo --no-enable         # install but leave disabled (no prompt)
+hermes plugins update my-plugin                      # pull latest
+hermes plugins remove my-plugin                      # uninstall
+hermes plugins enable my-plugin                      # add to allow-list (flat plugin)
+hermes plugins enable observability/langfuse         # add to allow-list (sub-category plugin)
+hermes plugins disable my-plugin                     # remove from allow-list + add to disabled
 ```
 
+For plugins under a sub-category directory (e.g. `plugins/observability/langfuse/`, `plugins/image_gen/openai/`), use the full `<category>/<plugin>` key — that's exactly what `hermes plugins list` shows in the **Name** column.
+
 ### Interactive UI
 
 Running `hermes plugins` with no arguments opens a composite interactive screen:
@@ -286,6 +291,7 @@ Plugins
  → [✓] my-tool-plugin — Custom search tool
    [ ] webhook-notifier — Event hooks
    [ ] disk-cleanup — Auto-cleanup of ephemeral files [bundled]
+   [ ] observability/langfuse — Trace turns / LLM calls / tools to Langfuse [bundled]
 
   Provider Plugins
      Memory Provider          ▸ honcho

From 8ab8bc2f035ac4ed8b3b43ed2940ba3dc4589cc9 Mon Sep 17 00:00:00 2001
From: Guillaume Meyer <guillaume.meyer@outlook.com>
Date: Sat, 16 May 2026 23:04:42 +0000
Subject: [PATCH 153/218] =?UTF-8?q?fix(plugins):=20remove=20unreachable=20?=
 =?UTF-8?q?hermes=20tools=20=E2=86=92=20Langfuse=20path?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The langfuse plugin is hooks-only (no toolsets), so it never appears in
`hermes tools` — that menu iterates `_get_effective_configurable_toolsets()`
(= `CONFIGURABLE_TOOLSETS` + plugin-registered toolsets), and "langfuse"
is in neither. The `TOOL_CATEGORIES["langfuse"]` setup wizard (with its
`post_setup: "langfuse"` hook that pip-installs the SDK and writes
`plugins.enabled`) was reachable only when a toolset key "langfuse" got
enabled, which can't happen — so it's been dead code, and the docs that
promised "Setup (interactive): hermes tools → Langfuse Observability"
were silently broken.

Right home for that wizard is `hermes plugins` (e.g. auto-running a
plugin's post-setup hook on enable), which is a generic plugin-setup
mechanism worth designing properly rather than shoehorning langfuse
back into `hermes tools`. Until that exists, point users at the
working manual flow.

Code:
- Delete `TOOL_CATEGORIES["langfuse"]` (24 lines) — unreachable.
- Delete the `post_setup_key == "langfuse"` branch in `_run_post_setup`
  (29 lines) — only caller was the deleted TOOL_CATEGORIES entry.

Docs / comments (point at the manual flow + interactive `hermes plugins`):
- `plugins/observability/langfuse/README.md`: collapse the two-option
  setup section to the single working flow.
- `plugins/observability/langfuse/plugin.yaml`: update `description`.
- `plugins/observability/langfuse/__init__.py`: update module docstring.
- `hermes_cli/config.py`: update inline comment above the LANGFUSE_*
  env-var allow-list.
- `website/docs/user-guide/features/built-in-plugins.md`: collapse
  "Setup (interactive)" + "Setup (manual)" into one accurate block.
- `website/docs/reference/environment-variables.md`: update the
  cross-reference in the Langfuse env-vars section.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hermes_cli/config.py                          |  3 +-
 hermes_cli/tools_config.py                    | 55 -------------------
 plugins/observability/langfuse/README.md      | 10 +---
 plugins/observability/langfuse/__init__.py    |  8 +--
 plugins/observability/langfuse/plugin.yaml    |  2 +-
 .../docs/reference/environment-variables.md   |  2 +-
 .../user-guide/features/built-in-plugins.md   | 12 +---
 7 files changed, 12 insertions(+), 80 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 81706d1edb4..c1f68e1c88c 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -134,8 +134,7 @@ _EXTRA_ENV_KEYS = frozenset({
     "MATRIX_RECOVERY_KEY",
     # Langfuse observability plugin — optional tuning keys + standard SDK vars.
     # Activation is via plugins.enabled (opt-in through `hermes plugins enable
-    # observability/langfuse` or `hermes tools → Langfuse`); credentials gate
-    # the plugin at runtime.
+    # observability/langfuse`); credentials gate the plugin at runtime.
     "HERMES_LANGFUSE_ENV",
     "HERMES_LANGFUSE_RELEASE",
     "HERMES_LANGFUSE_SAMPLE_RATE",
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 3afaa5cc7c9..06ba32bea9e 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -461,31 +461,6 @@ TOOL_CATEGORIES = {
             },
         ],
     },
-    "langfuse": {
-        "name": "Langfuse Observability",
-        "icon": "📊",
-        "providers": [
-            {
-                "name": "Langfuse Cloud",
-                "tag": "Hosted Langfuse (cloud.langfuse.com)",
-                "env_vars": [
-                    {"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)", "url": "https://cloud.langfuse.com"},
-                    {"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)", "url": "https://cloud.langfuse.com"},
-                ],
-                "post_setup": "langfuse",
-            },
-            {
-                "name": "Langfuse Self-Hosted",
-                "tag": "Self-hosted Langfuse instance",
-                "env_vars": [
-                    {"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)"},
-                    {"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)"},
-                    {"key": "HERMES_LANGFUSE_BASE_URL", "prompt": "Langfuse server URL (e.g. http://localhost:3000)", "default": "http://localhost:3000"},
-                ],
-                "post_setup": "langfuse",
-            },
-        ],
-    },
 }
 
 # Simple env-var requirements for toolsets NOT in TOOL_CATEGORIES.
@@ -947,36 +922,6 @@ def _run_post_setup(post_setup_key: str):
             _print_warning(f"    Spotify login failed: {exc}")
             _print_info("    Run manually: hermes auth spotify")
 
-    elif post_setup_key == "langfuse":
-        # Install the langfuse SDK.
-        try:
-            __import__("langfuse")
-            _print_success("    langfuse SDK already installed")
-        except ImportError:
-            _print_info("    Installing langfuse SDK...")
-            result = _pip_install(["langfuse", "--quiet"], timeout=120)
-            if result.returncode == 0:
-                _print_success("    langfuse SDK installed")
-            else:
-                _print_warning("    langfuse SDK install failed — run manually: uv pip install langfuse")
-        # Opt the bundled observability/langfuse plugin into plugins.enabled.
-        # The plugin ships in the repo but doesn't load until the user enables
-        # it (standalone plugins are opt-in).
-        try:
-            from hermes_cli.plugins_cmd import _get_enabled_set, _save_enabled_set
-            enabled = _get_enabled_set()
-            if "observability/langfuse" in enabled or "langfuse" in enabled:
-                _print_success("    Plugin observability/langfuse already enabled")
-            else:
-                enabled.add("observability/langfuse")
-                _save_enabled_set(enabled)
-                _print_success("    Plugin observability/langfuse enabled")
-        except Exception as exc:
-            _print_warning(f"    Could not enable plugin automatically: {exc}")
-            _print_info("    Run manually: hermes plugins enable observability/langfuse")
-        _print_info("    Restart Hermes for tracing to take effect.")
-        _print_info("    Verify: hermes plugins list")
-
     elif post_setup_key == "xai_grok":
         # Shared credential bootstrap for any picker entry that talks to xAI
         # (TTS, Video Gen, future Image Gen, etc.). Accepts either a
diff --git a/plugins/observability/langfuse/README.md b/plugins/observability/langfuse/README.md
index 864735d9688..97f4757e5a8 100644
--- a/plugins/observability/langfuse/README.md
+++ b/plugins/observability/langfuse/README.md
@@ -5,20 +5,16 @@ you explicitly enable it.
 
 ## Enable
 
-Pick one:
-
 ```bash
-# Interactive: walks you through credentials + SDK install + enable
-hermes tools  # → Langfuse Observability
-
-# Manual
 pip install langfuse
 hermes plugins enable observability/langfuse
 ```
 
+Or check the box in the interactive `hermes plugins` UI.
+
 ## Required credentials
 
-Set these in `~/.hermes/.env` (or via `hermes tools`):
+Set these in `~/.hermes/.env`:
 
 ```bash
 HERMES_LANGFUSE_PUBLIC_KEY=pk-lf-...
diff --git a/plugins/observability/langfuse/__init__.py b/plugins/observability/langfuse/__init__.py
index 8516030fb01..a99a8eb9279 100644
--- a/plugins/observability/langfuse/__init__.py
+++ b/plugins/observability/langfuse/__init__.py
@@ -4,11 +4,11 @@ Traces Hermes conversations, LLM calls, and tool usage to Langfuse.
 
 Activation is handled by the Hermes plugin system — standalone plugins only
 load when listed in ``plugins.enabled`` (via ``hermes plugins enable
-observability/langfuse`` or ``hermes tools → Langfuse Observability``). At
-runtime the plugin also requires the ``langfuse`` SDK and credentials; if
-either is missing the hooks are inert.
+observability/langfuse``, or by checking the box in the interactive
+``hermes plugins`` UI). At runtime the plugin also requires the
+``langfuse`` SDK and credentials; if either is missing the hooks are inert.
 
-Required env vars (set via ``hermes tools`` or ~/.hermes/.env):
+Required env vars (set in ~/.hermes/.env):
   HERMES_LANGFUSE_PUBLIC_KEY  - Langfuse project public key (pk-lf-...)
   HERMES_LANGFUSE_SECRET_KEY  - Langfuse project secret key (sk-lf-...)
   HERMES_LANGFUSE_BASE_URL    - Langfuse server URL (default: https://cloud.langfuse.com)
diff --git a/plugins/observability/langfuse/plugin.yaml b/plugins/observability/langfuse/plugin.yaml
index 18f1c6245d3..708264c8a96 100644
--- a/plugins/observability/langfuse/plugin.yaml
+++ b/plugins/observability/langfuse/plugin.yaml
@@ -1,6 +1,6 @@
 name: langfuse
 version: "1.0.0"
-description: "Optional Langfuse observability for Hermes — traces conversations, LLM calls, and tool usage. Opt-in via `hermes plugins enable observability/langfuse` or `hermes tools → Langfuse Observability`."
+description: "Optional Langfuse observability for Hermes — traces conversations, LLM calls, and tool usage. Opt-in via `hermes plugins enable observability/langfuse` (or check the box in `hermes plugins`)."
 author: NousResearch
 requires_env:
   - HERMES_LANGFUSE_PUBLIC_KEY
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 56fe8a13715..4866ac083ac 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -156,7 +156,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 
 ### Langfuse Observability
 
-Environment variables for the bundled [`observability/langfuse`](/docs/user-guide/features/built-in-plugins#observabilitylangfuse) plugin. Set these with `hermes tools → Langfuse Observability` or manually in `~/.hermes/.env`. The plugin must also be enabled (`hermes plugins enable observability/langfuse`) before any of these take effect.
+Environment variables for the bundled [`observability/langfuse`](/docs/user-guide/features/built-in-plugins#observabilitylangfuse) plugin. Set these in `~/.hermes/.env`. The plugin must also be enabled (`hermes plugins enable observability/langfuse`, or check the box in `hermes plugins`) before any of these take effect.
 
 | Variable | Description |
 |----------|-------------|
diff --git a/website/docs/user-guide/features/built-in-plugins.md b/website/docs/user-guide/features/built-in-plugins.md
index aa346308913..8ac3322c68b 100644
--- a/website/docs/user-guide/features/built-in-plugins.md
+++ b/website/docs/user-guide/features/built-in-plugins.md
@@ -121,22 +121,14 @@ Traces Hermes turns, LLM calls, and tool invocations to [Langfuse](https://langf
 
 The plugin is fail-open: no SDK installed, no credentials, or a transient Langfuse error — all turn into a silent no-op in the hook. The agent loop is never impacted.
 
-**Setup (interactive — recommended):**
-
-```bash
-hermes tools          # → Langfuse Observability → Cloud or Self-Hosted
-```
-
-The wizard collects your keys, `pip install`s the `langfuse` SDK, and adds `observability/langfuse` to `plugins.enabled` for you. Restart Hermes and the next turn ships a trace.
-
-**Setup (manual):**
+**Setup:**
 
 ```bash
 pip install langfuse
 hermes plugins enable observability/langfuse
 ```
 
-Then put the credentials in `~/.hermes/.env`:
+Or check the box in the interactive `hermes plugins` UI. Then put the credentials in `~/.hermes/.env`:
 
 ```bash
 HERMES_LANGFUSE_PUBLIC_KEY=pk-lf-...

From 21be7025c584ea9b1d829e088b6049e259c6859a Mon Sep 17 00:00:00 2001
From: Guillaume Meyer <guillaume.meyer@outlook.com>
Date: Sat, 16 May 2026 23:12:18 +0000
Subject: [PATCH 154/218] refactor(plugins): drop dead bundled-source guard in
 _discover_all_plugins
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `if key in seen and source == "bundled": continue` check was
unreachable: bundled is scanned before user, so `key in seen` can never
be true while `source == "bundled"`. The "user overrides bundled"
semantics are preserved automatically by the unconditional
`seen[key] = …` on the user pass.

Replaces the dead guard with a one-line comment explaining the
overwrite semantics, so a future contributor adding a third source
(e.g. project plugins) can see at a glance how ordering interacts with
the dict-overwrite. Matches `PluginManager.discover_and_load`'s
"user wins" rule.

Spotted by Copilot in code review on #27161.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hermes_cli/plugins_cmd.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index 6fa3c59c7a3..1e1c3282bee 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -762,11 +762,12 @@ def _discover_all_plugins() -> list:
                     except Exception:
                         pass
                 key = f"{prefix}/{d.name}" if prefix else manifest_name
-                if key in seen and source == "bundled":
-                    continue
                 src_label = source
                 if source == "user" and (d / ".git").exists():
                     src_label = "git"
+                # Bundled is scanned before user, so the user pass overwrites
+                # bundled entries with the same key — matches
+                # PluginManager.discover_and_load's "user wins" semantics.
                 seen[key] = (key, version, description, src_label, d)
                 continue
 

From 5cbe0b1c4ffabf6aeca31827ba9a76ec35e4d4fb Mon Sep 17 00:00:00 2001
From: Guillaume Meyer <guillaume.meyer@outlook.com>
Date: Sat, 16 May 2026 23:14:26 +0000
Subject: [PATCH 155/218] test(plugins): cover _discover_all_plugins recursion
 + cross-link loader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a TestDiscoverAllPlugins class covering the six cases the recursive
scan needs to handle:

- flat plugin uses its manifest ``name:`` as the key
- category-namespaced plugin keys off ``<category>/<dirname>`` even when
  the manifest ``name:`` is bare (regression test for the original bug —
  ``plugins/observability/langfuse/`` with ``name: langfuse`` must
  surface as ``observability/langfuse``, not ``langfuse``)
- user-installed plugin overrides bundled on key collision
- depth cap: anything below ``<root>/<category>/<plugin>/`` is ignored
- bundled ``memory/`` and ``context_engine/`` are skipped (they have
  their own loaders), but user plugins under those category names are
  still scanned

Also add an in-source comment next to the key derivation pointing at the
loader's matching line (``PluginManager._parse_manifest`` in
plugins.py:1027-1028), so future renames of one site flag the other.

Both items raised in Copilot review on #27161.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 hermes_cli/plugins_cmd.py            |   5 ++
 tests/hermes_cli/test_plugins_cmd.py | 111 +++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)

diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index 1e1c3282bee..8c002456787 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -761,6 +761,11 @@ def _discover_all_plugins() -> list:
                         description = manifest.get("description", "")
                     except Exception:
                         pass
+                # Path-derived key, intentionally ignoring the manifest
+                # ``name:`` field for category-namespaced plugins — mirrors
+                # ``PluginManager._parse_manifest`` in plugins.py:1027-1028
+                # so renaming a directory (without touching plugin.yaml) shifts
+                # the registry key in both places consistently.
                 key = f"{prefix}/{d.name}" if prefix else manifest_name
                 src_label = source
                 if source == "user" and (d / ".git").exists():
diff --git a/tests/hermes_cli/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py
index 180646c935d..5a421f018f9 100644
--- a/tests/hermes_cli/test_plugins_cmd.py
+++ b/tests/hermes_cli/test_plugins_cmd.py
@@ -396,6 +396,117 @@ class TestCmdList:
         cmd_list()
 
 
+# ── _discover_all_plugins tests ───────────────────────────────────────────────
+
+
+class TestDiscoverAllPlugins:
+    """Exercise the recursive scan that powers ``hermes plugins list``.
+
+    Mirrors the layouts the runtime loader handles
+    (:meth:`PluginManager._scan_directory_level`): flat plugins at the root,
+    category-namespaced plugins one level deeper, and user-overrides-bundled
+    on key collision.
+    """
+
+    @staticmethod
+    def _write_plugin(root: Path, segments: list, manifest_name: str = None) -> None:
+        plugin_dir = root
+        for seg in segments:
+            plugin_dir = plugin_dir / seg
+        plugin_dir.mkdir(parents=True, exist_ok=True)
+        manifest = {
+            "name": manifest_name or segments[-1],
+            "version": "0.1.0",
+            "description": f"Test plugin {'/'.join(segments)}",
+        }
+        (plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest))
+
+    def _entries_by_key(self, tmp_path, monkeypatch) -> dict:
+        from hermes_cli import plugins_cmd
+        bundled = tmp_path / "bundled"
+        user = tmp_path / "user"
+        bundled.mkdir()
+        user.mkdir()
+        monkeypatch.setattr(
+            "hermes_cli.plugins.get_bundled_plugins_dir", lambda: bundled
+        )
+        monkeypatch.setattr(plugins_cmd, "_plugins_dir", lambda: user)
+        return bundled, user, lambda: {
+            e[0]: e for e in plugins_cmd._discover_all_plugins()
+        }
+
+    def test_flat_plugin_uses_manifest_name_as_key(self, tmp_path, monkeypatch):
+        bundled, _, discover = self._entries_by_key(tmp_path, monkeypatch)
+        self._write_plugin(bundled, ["disk-cleanup"])
+
+        entries = discover()
+        assert "disk-cleanup" in entries
+        assert entries["disk-cleanup"][3] == "bundled"
+
+    def test_category_namespaced_plugin_uses_path_derived_key(
+        self, tmp_path, monkeypatch
+    ):
+        """Regression test for the original bug — ``observability/langfuse``
+        and ``image_gen/openai`` must surface under their path-derived key,
+        not vanish because the category directory has no ``plugin.yaml``."""
+        bundled, _, discover = self._entries_by_key(tmp_path, monkeypatch)
+        # langfuse's real manifest declares ``name: langfuse`` (bare), but it
+        # lives under ``observability/`` — the key must reflect the path.
+        self._write_plugin(
+            bundled, ["observability", "langfuse"], manifest_name="langfuse"
+        )
+        self._write_plugin(bundled, ["image_gen", "openai"])
+
+        entries = discover()
+        assert "observability/langfuse" in entries
+        assert "image_gen/openai" in entries
+        # Bare manifest name must NOT leak through as a top-level key.
+        assert "langfuse" not in entries
+        assert "openai" not in entries
+
+    def test_user_overrides_bundled_on_key_collision(self, tmp_path, monkeypatch):
+        bundled, user, discover = self._entries_by_key(tmp_path, monkeypatch)
+        self._write_plugin(bundled, ["observability", "langfuse"])
+        self._write_plugin(user, ["observability", "langfuse"])
+
+        entries = discover()
+        assert entries["observability/langfuse"][3] == "user"
+
+    def test_depth_cap_skips_third_level(self, tmp_path, monkeypatch):
+        """Anything deeper than ``<root>/<category>/<plugin>/`` is ignored,
+        matching the loader's depth cap."""
+        bundled, _, discover = self._entries_by_key(tmp_path, monkeypatch)
+        # plugins/a/b/c/plugin.yaml — too deep, must NOT be discovered.
+        self._write_plugin(bundled, ["a", "b", "c"])
+
+        entries = discover()
+        assert not any(k.startswith("a/") for k in entries), entries
+
+    def test_bundled_memory_and_context_engine_skipped(self, tmp_path, monkeypatch):
+        """``plugins/memory/`` and ``plugins/context_engine/`` use their own
+        loaders; bundled entries inside them must not appear in the general
+        list (matches the pre-refactor skip set)."""
+        bundled, _, discover = self._entries_by_key(tmp_path, monkeypatch)
+        self._write_plugin(bundled, ["memory", "honcho"])
+        self._write_plugin(bundled, ["context_engine", "compressor"])
+        self._write_plugin(bundled, ["observability", "langfuse"])
+
+        entries = discover()
+        assert "memory/honcho" not in entries
+        assert "context_engine/compressor" not in entries
+        assert "observability/langfuse" in entries
+
+    def test_user_memory_subdir_is_still_scanned(self, tmp_path, monkeypatch):
+        """The memory/context_engine skip only applies to *bundled* — a user
+        plugin at ``~/.hermes/plugins/memory/<x>/`` should still be discovered
+        so the user can see what they installed."""
+        bundled, user, discover = self._entries_by_key(tmp_path, monkeypatch)
+        self._write_plugin(user, ["memory", "my-custom-store"])
+
+        entries = discover()
+        assert "memory/my-custom-store" in entries
+
+
 # ── _copy_example_files tests ─────────────────────────────────────────────────
 
 
From 3b39096904ae63a9e784b2403ad6ad27160bb2ef Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 17:18:25 -0700
Subject: [PATCH 156/218] Port from Kilo-Org/kilocode#9434: strip historical
 media after compression (#27189)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After context compression, the protected tail messages retain their
original image parts. When those include multi-MB pasted screenshots,
every subsequent API request re-ships the same base-64 blobs forever —
which can push the request past provider body-size limits and wedge the
session even though compression 'succeeded'.

Add _strip_historical_media() to agent/context_compressor.py. After the
summary is built, find the newest user message that carries an image
part and replace image parts in every earlier message with a short
text placeholder ('[Attached image — stripped after compression]').
The newest image-bearing user turn keeps its media so the model can
still analyse what the user just sent.

Handles all three multimodal shapes:
  - OpenAI chat.completions image_url
  - OpenAI Responses API input_image
  - Anthropic native {type: image, source: ...}

Includes 27 unit tests covering the helpers and the end-to-end
compress() integration, plus a manual E2E check confirming a ~4MB
two-image conversation shrinks to ~2MB after compression.
---
 agent/context_compressor.py                   | 116 ++++++++
 .../agent/test_compressor_historical_media.py | 266 ++++++++++++++++++
 2 files changed, 382 insertions(+)
 create mode 100644 tests/agent/test_compressor_historical_media.py

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index e7a14faf51b..8eadcf26ef8 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -221,6 +221,114 @@ def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
     return json.dumps(shrunken, ensure_ascii=False)
 
 
+_IMAGE_PART_TYPES = frozenset({"image_url", "input_image", "image"})
+
+
+def _is_image_part(part: Any) -> bool:
+    """True if ``part`` is a multimodal image content block.
+
+    Recognizes all three shapes the agent handles:
+      - OpenAI chat.completions: ``{"type": "image_url", "image_url": ...}``
+      - OpenAI Responses API:    ``{"type": "input_image", "image_url": "..."}``
+      - Anthropic native:        ``{"type": "image", "source": {...}}``
+    """
+    if not isinstance(part, dict):
+        return False
+    return part.get("type") in _IMAGE_PART_TYPES
+
+
+def _content_has_images(content: Any) -> bool:
+    """True if a message's ``content`` is a multimodal list with image parts."""
+    if not isinstance(content, list):
+        return False
+    return any(_is_image_part(p) for p in content)
+
+
+def _strip_images_from_content(content: Any) -> Any:
+    """Return a copy of ``content`` with every image part replaced by a
+    short text placeholder.
+
+    - String content is returned unchanged.
+    - Non-list, non-string content is returned unchanged.
+    - List content: image parts become ``{"type": "text", "text": "[Attached
+      image — stripped after compression]"}``; other parts are preserved as-is.
+
+    Input is never mutated.
+    """
+    if not isinstance(content, list):
+        return content
+    if not any(_is_image_part(p) for p in content):
+        return content
+
+    new_parts: List[Any] = []
+    for p in content:
+        if _is_image_part(p):
+            new_parts.append({
+                "type": "text",
+                "text": "[Attached image — stripped after compression]",
+            })
+        else:
+            new_parts.append(p)
+    return new_parts
+
+
+def _strip_historical_media(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Replace image parts in older messages with placeholder text.
+
+    The anchor is the *last* user message that has any image content. Every
+    message before that anchor gets its image parts replaced with a short
+    placeholder so the outgoing request stops re-shipping the same multi-MB
+    base-64 image blobs on every turn.
+
+    If no user message carries images, the list is returned unchanged.
+    If the only user message with images is the very first one (nothing
+    earlier to strip), the list is returned unchanged.
+
+    Shallow copies of touched messages only; input is never mutated.
+    Port of Kilo-Org/kilocode#9434 (adapted for the OpenAI-style message
+    shape the hermes compressor emits).
+    """
+    if not messages:
+        return messages
+
+    # Find the newest user message that carries at least one image part.
+    # We anchor on image-bearing user messages (not all user messages) so
+    # a plain text follow-up after a big-image turn still strips the old
+    # image — matching the problem kilocode#9434 set out to solve.
+    anchor = -1
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        if not isinstance(msg, dict):
+            continue
+        if msg.get("role") != "user":
+            continue
+        if _content_has_images(msg.get("content")):
+            anchor = i
+            break
+
+    if anchor <= 0:
+        # No image-bearing user message, or it's the very first message —
+        # nothing before it to strip.
+        return messages
+
+    changed = False
+    result: List[Dict[str, Any]] = []
+    for i, msg in enumerate(messages):
+        if i >= anchor or not isinstance(msg, dict):
+            result.append(msg)
+            continue
+        content = msg.get("content")
+        if not _content_has_images(content):
+            result.append(msg)
+            continue
+        new_msg = msg.copy()
+        new_msg["content"] = _strip_images_from_content(content)
+        result.append(new_msg)
+        changed = True
+
+    return result if changed else messages
+
+
 def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
     """Create an informative 1-line summary of a tool call + result.
 
@@ -1559,6 +1667,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
 
         compressed = self._sanitize_tool_pairs(compressed)
 
+        # Replace image parts in all compressed messages before the newest
+        # image-bearing user turn with a short text placeholder. Without
+        # this, tail messages keep their original multi-MB base-64 image
+        # payloads forever, which can push every subsequent API request
+        # past the provider's body-size limit and wedge the session.
+        # Port of Kilo-Org/kilocode#9434.
+        compressed = _strip_historical_media(compressed)
+
         new_estimate = estimate_messages_tokens_rough(compressed)
         saved_estimate = display_tokens - new_estimate
 
diff --git a/tests/agent/test_compressor_historical_media.py b/tests/agent/test_compressor_historical_media.py
new file mode 100644
index 00000000000..3594ef9bdde
--- /dev/null
+++ b/tests/agent/test_compressor_historical_media.py
@@ -0,0 +1,266 @@
+"""Tests for post-compression historical-media stripping.
+
+Port of Kilo-Org/kilocode#9434 (adapted for OpenAI-style message lists).
+Without this pass, tail messages keep their original multi-MB base-64 image
+payloads after context compression, and every subsequent request re-ships
+them — sometimes breaching provider body-size limits and wedging the
+session.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+from agent.context_compressor import (
+    ContextCompressor,
+    _content_has_images,
+    _is_image_part,
+    _strip_historical_media,
+    _strip_images_from_content,
+)
+
+
+IMG_URL = {
+    "type": "image_url",
+    "image_url": {"url": "data:image/png;base64," + ("A" * 1024)},
+}
+INPUT_IMG = {
+    "type": "input_image",
+    "image_url": "data:image/png;base64," + ("B" * 1024),
+}
+ANTHROPIC_IMG = {
+    "type": "image",
+    "source": {"type": "base64", "media_type": "image/png", "data": "C" * 1024},
+}
+TEXT = {"type": "text", "text": "hi"}
+INPUT_TEXT = {"type": "input_text", "text": "hi"}
+
+
+class TestIsImagePart:
+    def test_openai_chat_shape(self):
+        assert _is_image_part(IMG_URL) is True
+
+    def test_openai_responses_shape(self):
+        assert _is_image_part(INPUT_IMG) is True
+
+    def test_anthropic_native_shape(self):
+        assert _is_image_part(ANTHROPIC_IMG) is True
+
+    def test_text_part_is_not_image(self):
+        assert _is_image_part(TEXT) is False
+        assert _is_image_part(INPUT_TEXT) is False
+
+    def test_non_dict_rejected(self):
+        assert _is_image_part("image") is False
+        assert _is_image_part(None) is False
+        assert _is_image_part(42) is False
+
+
+class TestContentHasImages:
+    def test_string_content(self):
+        assert _content_has_images("a string") is False
+
+    def test_empty_list(self):
+        assert _content_has_images([]) is False
+
+    def test_text_only_list(self):
+        assert _content_has_images([TEXT, TEXT]) is False
+
+    def test_list_with_image(self):
+        assert _content_has_images([TEXT, IMG_URL]) is True
+
+    def test_none(self):
+        assert _content_has_images(None) is False
+
+
+class TestStripImagesFromContent:
+    def test_string_passthrough(self):
+        assert _strip_images_from_content("hello") == "hello"
+
+    def test_none_passthrough(self):
+        assert _strip_images_from_content(None) is None
+
+    def test_text_only_passthrough(self):
+        parts = [TEXT, {"type": "text", "text": "world"}]
+        assert _strip_images_from_content(parts) == parts
+
+    def test_replaces_image_with_placeholder(self):
+        parts = [TEXT, IMG_URL]
+        out = _strip_images_from_content(parts)
+        assert len(out) == 2
+        assert out[0] == TEXT
+        assert out[1] == {
+            "type": "text",
+            "text": "[Attached image — stripped after compression]",
+        }
+
+    def test_does_not_mutate_input(self):
+        parts = [IMG_URL, TEXT]
+        _ = _strip_images_from_content(parts)
+        assert parts[0] is IMG_URL  # original list untouched
+        assert parts[1] is TEXT
+
+    def test_handles_all_three_shapes(self):
+        parts = [IMG_URL, INPUT_IMG, ANTHROPIC_IMG, TEXT]
+        out = _strip_images_from_content(parts)
+        assert sum(1 for p in out if p.get("type") == "text") == 4
+        assert not any(_is_image_part(p) for p in out)
+
+
+class TestStripHistoricalMedia:
+    def test_empty_passthrough(self):
+        assert _strip_historical_media([]) == []
+
+    def test_no_images_anywhere(self):
+        msgs = [
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hey"},
+            {"role": "user", "content": "bye"},
+        ]
+        assert _strip_historical_media(msgs) is msgs  # identity — no copy
+
+    def test_single_image_user_only_first_message(self):
+        # Only image-bearing user is the first message — nothing before it.
+        msgs = [
+            {"role": "user", "content": [TEXT, IMG_URL]},
+            {"role": "assistant", "content": "ok"},
+        ]
+        out = _strip_historical_media(msgs)
+        assert out is msgs  # no-op
+        # Image still there.
+        assert _content_has_images(out[0]["content"])
+
+    def test_strips_older_user_image_keeps_newest(self):
+        msgs = [
+            {"role": "user", "content": [TEXT, IMG_URL]},     # old — strip
+            {"role": "assistant", "content": "looked at it"},
+            {"role": "user", "content": [TEXT, INPUT_IMG]},   # newest — keep
+        ]
+        out = _strip_historical_media(msgs)
+        assert out is not msgs  # new list
+        # First message's image was replaced
+        assert not _content_has_images(out[0]["content"])
+        # Newest user still has its image
+        assert _content_has_images(out[2]["content"])
+
+    def test_strips_assistant_and_tool_images_before_anchor(self):
+        msgs = [
+            {"role": "user", "content": [TEXT, IMG_URL]},          # old user
+            {"role": "assistant", "content": [TEXT, IMG_URL]},     # old assistant
+            {"role": "tool", "content": [TEXT, IMG_URL], "tool_call_id": "t1"},
+            {"role": "user", "content": [TEXT, IMG_URL]},          # newest user — keep
+        ]
+        out = _strip_historical_media(msgs)
+        for i in range(3):
+            assert not _content_has_images(out[i]["content"]), f"msg {i} still has image"
+        assert _content_has_images(out[3]["content"])
+
+    def test_text_only_newest_user_still_strips_older_images(self):
+        # The anchor is "newest user WITH images". If the newest user is
+        # text-only, we fall back to the previous image-bearing user turn.
+        msgs = [
+            {"role": "user", "content": [TEXT, IMG_URL]},
+            {"role": "assistant", "content": "ok"},
+            {"role": "user", "content": [TEXT, IMG_URL]},  # anchor
+            {"role": "assistant", "content": "done"},
+            {"role": "user", "content": "follow-up text only"},
+        ]
+        out = _strip_historical_media(msgs)
+        # First image-bearing user (index 0) was stripped — it was before the
+        # newest image-bearing user (index 2).
+        assert not _content_has_images(out[0]["content"])
+        # Anchor (index 2) keeps its image.
+        assert _content_has_images(out[2]["content"])
+
+    def test_no_image_bearing_user_is_noop(self):
+        msgs = [
+            {"role": "user", "content": "first"},
+            {"role": "assistant", "content": [TEXT, IMG_URL]},  # assistant image only
+            {"role": "user", "content": "second"},
+        ]
+        out = _strip_historical_media(msgs)
+        # No image-bearing user anchor → no stripping.
+        assert out is msgs
+        assert _content_has_images(out[1]["content"])
+
+    def test_does_not_mutate_input_messages(self):
+        msg0 = {"role": "user", "content": [TEXT, IMG_URL]}
+        msg1 = {"role": "user", "content": [TEXT, IMG_URL]}
+        msgs = [msg0, msg1]
+        _ = _strip_historical_media(msgs)
+        # Originals untouched
+        assert _content_has_images(msg0["content"])
+        assert _content_has_images(msg1["content"])
+
+    def test_idempotent(self):
+        msgs = [
+            {"role": "user", "content": [TEXT, IMG_URL]},
+            {"role": "assistant", "content": "k"},
+            {"role": "user", "content": [TEXT, IMG_URL]},
+        ]
+        first = _strip_historical_media(msgs)
+        second = _strip_historical_media(first)
+        # Second pass is a no-op — no images left before the anchor.
+        assert second is first
+
+    def test_non_dict_messages_pass_through(self):
+        msgs = [
+            "not-a-dict",  # shouldn't crash
+            {"role": "user", "content": [TEXT, IMG_URL]},
+            {"role": "assistant", "content": "ok"},
+            {"role": "user", "content": [TEXT, IMG_URL]},
+        ]
+        out = _strip_historical_media(msgs)
+        assert out[0] == "not-a-dict"
+        # Image-bearing user at index 1 is before the anchor (index 3) → stripped.
+        assert not _content_has_images(out[1]["content"])
+
+
+class TestCompressIntegration:
+    """Verify the stripping runs inside ContextCompressor.compress()."""
+
+    @pytest.fixture
+    def compressor(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(
+                model="test/model",
+                threshold_percent=0.50,
+                protect_first_n=1,
+                protect_last_n=2,
+                quiet_mode=True,
+            )
+            return c
+
+    def test_compress_strips_historical_images(self, compressor):
+        # Enough messages to trigger the summarize path. protect_first_n=1 +
+        # protect_last_n=2 + a middle window of at least 3 with a summary.
+        msgs = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": [TEXT, IMG_URL]},           # old image-bearing user
+            {"role": "assistant", "content": "looked at it"},
+            {"role": "user", "content": "follow-up"},
+            {"role": "assistant", "content": "ack"},
+            {"role": "user", "content": "more"},
+            {"role": "assistant", "content": "ok"},
+            {"role": "user", "content": [TEXT, IMG_URL]},           # newest image-bearing user (tail)
+            {"role": "assistant", "content": "done"},
+        ]
+        # Bypass the real LLM summary — return a stub so compress() proceeds.
+        with patch.object(compressor, "_generate_summary", return_value="SUMMARY TEXT"):
+            out = compressor.compress(msgs, current_tokens=60_000)
+
+        # Newest user turn with image should still have it (it's in the tail).
+        user_imgs = [m for m in out if m.get("role") == "user" and _content_has_images(m.get("content"))]
+        assert len(user_imgs) == 1, (
+            "Expected exactly one user message with images after compression "
+            f"(the newest one); got {len(user_imgs)}"
+        )
+        # No assistant or tool messages should carry images either.
+        for m in out:
+            if m is user_imgs[0]:
+                continue
+            assert not _content_has_images(m.get("content")), (
+                f"Stale image in {m.get('role')!r} message after compression"
+            )

From 407a11b4190d7a6ebbc6429d0481545abd86aadc Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 20:25:47 -0700
Subject: [PATCH 157/218] feat(discord): allow_any_attachment config to accept
 arbitrary file types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Discord adapter silently dropped any attachment whose extension wasn't
in the SUPPORTED_DOCUMENT_TYPES allowlist (PDF, text family, zip, office).
Users uploading .wav / .bin / other unrecognized formats saw nothing in
their conversation — the file got logged as 'Unsupported document type'
and discarded before the agent ever saw it.

Add discord.allow_any_attachment (default false) to bypass the allowlist.
When on:
  - Any file is downloaded, cached under ~/.hermes/cache/documents/, and
    surfaced as a DOCUMENT-typed event with application/octet-stream MIME
  - gateway/run.py already emits a context note with the cached path,
    auto-translated via to_agent_visible_cache_path() for Docker/Modal
    sandboxed terminals
  - File body is NOT inlined — only the path — so binary uploads don't
    blow up the context window
  - Allowlisted text formats (.txt/.md/.log) keep their 100 KiB inline
    behavior unchanged

Also adds discord.max_attachment_bytes (default 32 MiB matches the
historical hardcoded cap; 0 = unlimited) since users opting into arbitrary
types may want to raise the cap. The whole attachment is held in memory
while being cached, so unlimited carries a real memory cost.

Env overrides: DISCORD_ALLOW_ANY_ATTACHMENT, DISCORD_MAX_ATTACHMENT_BYTES.

Discord-only by deliberate scope. Telegram has hard 20 MB API limits and
Slack has its own caps — extending the same flag there is a separate
follow-up if/when requested.
---
 gateway/platforms/discord.py                  |  88 +++++++++--
 hermes_cli/config.py                          |  12 ++
 .../gateway/test_discord_document_handling.py | 145 ++++++++++++++++++
 .../docs/reference/environment-variables.md   |   2 +
 website/docs/user-guide/messaging/discord.md  |  21 +++
 5 files changed, 258 insertions(+), 10 deletions(-)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index a3904630fa9..9b8285e2a36 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -3564,6 +3564,43 @@ class DiscordAdapter(BasePlatformAdapter):
             return bool(configured)
         return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in {"false", "0", "no", "off"}
 
+    def _discord_allow_any_attachment(self) -> bool:
+        """Return whether Discord attachments bypass the SUPPORTED_DOCUMENT_TYPES allowlist.
+
+        When True, any uploaded file is cached to disk and surfaced to the
+        agent as a local path so it can be inspected via terminal / read_file
+        / ffprobe / etc. Default False preserves the historical behaviour of
+        dropping unsupported types with a warning log.
+        """
+        configured = self.config.extra.get("allow_any_attachment")
+        if configured is not None:
+            if isinstance(configured, str):
+                return configured.lower() not in {"false", "0", "no", "off", ""}
+            return bool(configured)
+        return os.getenv("DISCORD_ALLOW_ANY_ATTACHMENT", "false").lower() in {"true", "1", "yes", "on"}
+
+    def _discord_max_attachment_bytes(self) -> int:
+        """Return the per-attachment byte cap. 0 means unlimited.
+
+        The whole attachment is held in memory while being written to the
+        cache, so unlimited carries a real memory cost. Default 32 MiB
+        matches the historical hardcoded value.
+        """
+        configured = self.config.extra.get("max_attachment_bytes")
+        if configured is None:
+            configured = os.getenv("DISCORD_MAX_ATTACHMENT_BYTES")
+        if configured is None or configured == "":
+            return 32 * 1024 * 1024
+        try:
+            value = int(configured)
+        except (TypeError, ValueError):
+            logger.warning(
+                "[Discord] Invalid max_attachment_bytes value %r, falling back to 32 MiB",
+                configured,
+            )
+            return 32 * 1024 * 1024
+        return max(0, value)
+
     def _discord_free_response_channels(self) -> set:
         """Return Discord channel IDs where no bot mention is required.
 
@@ -4495,6 +4532,7 @@ class DiscordAdapter(BasePlatformAdapter):
         if normalized_content.startswith("/"):
             msg_type = MessageType.COMMAND
         elif all_attachments:
+            _allow_any = self._discord_allow_any_attachment()
             # Check attachment types
             for att in all_attachments:
                 if att.content_type:
@@ -4509,9 +4547,15 @@ class DiscordAdapter(BasePlatformAdapter):
                         if att.filename:
                             _, doc_ext = os.path.splitext(att.filename)
                             doc_ext = doc_ext.lower()
-                        if doc_ext in SUPPORTED_DOCUMENT_TYPES:
+                        if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any:
                             msg_type = MessageType.DOCUMENT
                     break
+                elif _allow_any:
+                    # No content_type at all (rare — discord usually fills it
+                    # in). Treat as a document so downstream pipelines surface
+                    # the path to the agent.
+                    msg_type = MessageType.DOCUMENT
+                    break
 
         # When auto-threading kicked in, route responses to the new thread
         effective_channel = auto_threaded_channel or message.channel
@@ -4594,31 +4638,48 @@ class DiscordAdapter(BasePlatformAdapter):
                 if not ext and content_type:
                     mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
                     ext = mime_to_ext.get(content_type, "")
-                if ext not in SUPPORTED_DOCUMENT_TYPES:
+                allow_any_attachment = self._discord_allow_any_attachment()
+                in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
+                if not in_allowlist and not allow_any_attachment:
                     logger.warning(
                         "[Discord] Unsupported document type '%s' (%s), skipping",
                         ext or "unknown", content_type,
                     )
                 else:
-                    MAX_DOC_BYTES = 32 * 1024 * 1024
-                    if att.size and att.size > MAX_DOC_BYTES:
+                    max_doc_bytes = self._discord_max_attachment_bytes()
+                    if max_doc_bytes and att.size and att.size > max_doc_bytes:
                         logger.warning(
-                            "[Discord] Document too large (%s bytes), skipping: %s",
-                            att.size, att.filename,
+                            "[Discord] Document too large (%s bytes > cap %s), skipping: %s",
+                            att.size, max_doc_bytes, att.filename,
                         )
                     else:
                         try:
                             raw_bytes = await self._cache_discord_document(att, ext)
                             cached_path = cache_document_from_bytes(
-                                raw_bytes, att.filename or f"document{ext}"
+                                raw_bytes, att.filename or f"document{ext or '.bin'}"
                             )
-                            doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
+                            if in_allowlist:
+                                doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
+                            else:
+                                # allow_any_attachment path: untyped file. Use the
+                                # source content_type if discord gave us one,
+                                # otherwise fall back to octet-stream so the agent
+                                # knows it's binary and reaches for terminal tools.
+                                doc_mime = (
+                                    content_type
+                                    if content_type and content_type != "unknown"
+                                    else "application/octet-stream"
+                                )
                             media_urls.append(cached_path)
                             media_types.append(doc_mime)
-                            logger.info("[Discord] Cached user document: %s", cached_path)
+                            logger.info(
+                                "[Discord] Cached user %s: %s",
+                                "document" if in_allowlist else "attachment",
+                                cached_path,
+                            )
                             # Inject text content for plain-text documents (capped at 100 KB)
                             MAX_TEXT_INJECT_BYTES = 100 * 1024
-                            if ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
+                            if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
                                 try:
                                     text_content = raw_bytes.decode("utf-8")
                                     display_name = att.filename or f"document{ext}"
@@ -4630,6 +4691,13 @@ class DiscordAdapter(BasePlatformAdapter):
                                         pending_text_injection = injection
                                 except UnicodeDecodeError:
                                     pass
+                            # NOTE: for the allow_any_attachment path we deliberately
+                            # do NOT inject a path string here. ``gateway/run.py``
+                            # already detects DOCUMENT-typed events with
+                            # ``application/octet-stream`` MIME and emits a context
+                            # note with the sandbox-translated cache path via
+                            # ``to_agent_visible_cache_path()`` (important for
+                            # Docker/Modal terminal backends).
                         except Exception as e:
                             logger.warning(
                                 "[Discord] Failed to cache document %s: %s",
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c1f68e1c88c..c41158e42ae 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1306,6 +1306,18 @@ DEFAULT_CONFIG = {
         # list_roles, member_info, search_members, fetch_messages, list_pins,
         # pin_message, unpin_message, create_thread, add_role, remove_role.
         "server_actions": "",
+        # Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES).
+        # When True, any uploaded file is cached to disk with mime
+        # application/octet-stream and the path is surfaced to the agent so it
+        # can use terminal/read_file/etc. against it. Default False preserves
+        # the historical allowlist behaviour.
+        # Env override: DISCORD_ALLOW_ANY_ATTACHMENT.
+        "allow_any_attachment": False,
+        # Maximum bytes per attachment the gateway will cache. The whole file
+        # is held in memory while being written, so unlimited uploads carry a
+        # real memory cost. Default 32 MiB matches the historical hardcoded
+        # cap. Set to 0 for no cap. Env override: DISCORD_MAX_ATTACHMENT_BYTES.
+        "max_attachment_bytes": 33554432,
     },
 
     # WhatsApp platform settings (gateway mode)
diff --git a/tests/gateway/test_discord_document_handling.py b/tests/gateway/test_discord_document_handling.py
index d3ad137b61c..0685b69663a 100644
--- a/tests/gateway/test_discord_document_handling.py
+++ b/tests/gateway/test_discord_document_handling.py
@@ -384,3 +384,148 @@ class TestIncomingDocumentHandling:
         assert event.message_type == MessageType.PHOTO
         assert event.media_urls == ["/tmp/cached_image.png"]
         assert event.media_types == ["image/png"]
+
+
+class TestAllowAnyAttachment:
+    """Cover the discord.allow_any_attachment config flag.
+
+    With the flag off (default), unknown file types are dropped. With it on,
+    they get cached and surfaced to the agent as DOCUMENT events with
+    application/octet-stream MIME so gateway/run.py emits a path-pointing
+    context note.
+    """
+
+    @pytest.mark.asyncio
+    async def test_unknown_type_skipped_by_default(self, adapter):
+        """Default (flag off): unknown extension is dropped.
+
+        With no text + no cached media, the adapter may legitimately decline
+        to dispatch the event at all, so we don't assert on call_args here —
+        we just verify the file wasn't cached.
+        """
+        with _mock_aiohttp_download(b"should not be cached"):
+            msg = make_message([
+                make_attachment(filename="weird.xyz", content_type="application/x-custom")
+            ])
+            await adapter._handle_message(msg)
+
+        if adapter.handle_message.call_args is not None:
+            event = adapter.handle_message.call_args[0][0]
+            assert event.media_urls == []
+
+    @pytest.mark.asyncio
+    async def test_unknown_type_cached_when_flag_on(self, adapter):
+        """Flag on: unknown extension is cached as application/octet-stream."""
+        adapter.config.extra["allow_any_attachment"] = True
+
+        with _mock_aiohttp_download(b"\x00\x01\x02 binary payload"):
+            msg = make_message([
+                make_attachment(filename="weird.xyz", content_type="application/x-custom")
+            ])
+            await adapter._handle_message(msg)
+
+        event = adapter.handle_message.call_args[0][0]
+        assert len(event.media_urls) == 1
+        assert os.path.exists(event.media_urls[0])
+        # Falls back to the source content_type when we have one.
+        assert event.media_types == ["application/x-custom"]
+        assert event.message_type == MessageType.DOCUMENT
+        # We deliberately do NOT inline arbitrary bytes — run.py emits the
+        # path-pointing note based on DOCUMENT + octet-stream MIME.
+        assert "[Content of" not in (event.text or "")
+
+    @pytest.mark.asyncio
+    async def test_unknown_type_no_content_type_becomes_octet_stream(self, adapter):
+        """Flag on + no content_type from discord: MIME falls back to octet-stream."""
+        adapter.config.extra["allow_any_attachment"] = True
+
+        with _mock_aiohttp_download(b"raw bytes"):
+            msg = make_message([
+                make_attachment(filename="mystery.bin", content_type=None)
+            ])
+            await adapter._handle_message(msg)
+
+        event = adapter.handle_message.call_args[0][0]
+        assert event.message_type == MessageType.DOCUMENT
+        assert event.media_types == ["application/octet-stream"]
+
+    @pytest.mark.asyncio
+    async def test_max_attachment_bytes_caps_uploads(self, adapter):
+        """discord.max_attachment_bytes overrides the historical 32 MiB cap."""
+        adapter.config.extra["allow_any_attachment"] = True
+        adapter.config.extra["max_attachment_bytes"] = 1024  # 1 KiB
+
+        msg = make_message([
+            make_attachment(
+                filename="too_big.xyz",
+                content_type="application/x-custom",
+                size=2048,
+            )
+        ])
+        await adapter._handle_message(msg)
+
+        event = adapter.handle_message.call_args[0][0]
+        assert event.media_urls == []
+
+    @pytest.mark.asyncio
+    async def test_max_attachment_bytes_zero_means_unlimited(self, adapter):
+        """max_attachment_bytes=0 disables the size cap entirely."""
+        adapter.config.extra["allow_any_attachment"] = True
+        adapter.config.extra["max_attachment_bytes"] = 0
+
+        # 64 MiB — would normally exceed the historical 32 MiB hardcoded cap.
+        with _mock_aiohttp_download(b"x" * 16):
+            msg = make_message([
+                make_attachment(
+                    filename="huge.xyz",
+                    content_type="application/x-custom",
+                    size=64 * 1024 * 1024,
+                )
+            ])
+            await adapter._handle_message(msg)
+
+        event = adapter.handle_message.call_args[0][0]
+        assert len(event.media_urls) == 1
+
+    @pytest.mark.asyncio
+    async def test_allowlisted_doc_unchanged_when_flag_on(self, adapter):
+        """Flag on must not change handling of types already in SUPPORTED_DOCUMENT_TYPES.
+
+        A .txt should still get its content inlined (the historical behavior),
+        and the MIME should still be the canonical text/plain — not whatever
+        discord guessed.
+        """
+        adapter.config.extra["allow_any_attachment"] = True
+        file_content = b"still a text file"
+
+        with _mock_aiohttp_download(file_content):
+            msg = make_message(
+                attachments=[make_attachment(filename="notes.txt", content_type="text/plain")],
+                content="check this",
+            )
+            await adapter._handle_message(msg)
+
+        event = adapter.handle_message.call_args[0][0]
+        assert "[Content of notes.txt]:" in event.text
+        assert "still a text file" in event.text
+        assert event.media_types == ["text/plain"]
+
+    def test_helper_reads_env_fallback(self, adapter, monkeypatch):
+        """Helper falls back to DISCORD_ALLOW_ANY_ATTACHMENT env var."""
+        assert adapter._discord_allow_any_attachment() is False
+        monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
+        assert adapter._discord_allow_any_attachment() is True
+        monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "no")
+        assert adapter._discord_allow_any_attachment() is False
+
+    def test_helper_config_overrides_env(self, adapter, monkeypatch):
+        """config.yaml setting wins over env var."""
+        monkeypatch.setenv("DISCORD_ALLOW_ANY_ATTACHMENT", "true")
+        adapter.config.extra["allow_any_attachment"] = False
+        assert adapter._discord_allow_any_attachment() is False
+
+    def test_max_bytes_helper_invalid_value_falls_back(self, adapter):
+        """Garbage in max_attachment_bytes config falls back to 32 MiB."""
+        adapter.config.extra["max_attachment_bytes"] = "not-a-number"
+        assert adapter._discord_max_attachment_bytes() == 32 * 1024 * 1024
+
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 4866ac083ac..90aecba4412 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -258,6 +258,8 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels |
 | `DISCORD_FREE_RESPONSE_CHANNELS` | Comma-separated channel IDs where mention is not required |
 | `DISCORD_AUTO_THREAD` | Auto-thread long replies when supported |
+| `DISCORD_ALLOW_ANY_ATTACHMENT` | When `true`, accept attachments of any file type (not just the built-in PDF/text/zip/office allowlist). Unknown types are cached and surfaced to the agent as a local path so it can inspect them via `terminal` / `read_file` / `ffprobe`. Default `false`. |
+| `DISCORD_MAX_ATTACHMENT_BYTES` | Maximum bytes per attachment the gateway will cache. Default `33554432` (32 MiB). Set to `0` for no cap (attachments are held in memory while being written). |
 | `DISCORD_REACTIONS` | Enable emoji reactions on messages during processing (default: `true`) |
 | `DISCORD_IGNORED_CHANNELS` | Comma-separated channel IDs where the bot never responds |
 | `DISCORD_NO_THREAD_CHANNELS` | Comma-separated channel IDs where bot responds without auto-threading |
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index 50f1641f093..5cad7a4a535 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -294,6 +294,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | `DISCORD_ALLOW_MENTION_USERS` | No | `true` | When `true` (default), the bot can ping individual users by ID. |
 | `DISCORD_ALLOW_MENTION_REPLIED_USER` | No | `true` | When `true` (default), replying to a message pings the original author. |
 | `DISCORD_PROXY` | No | — | Proxy URL for Discord connections (HTTP, WebSocket, REST). Overrides `HTTPS_PROXY`/`ALL_PROXY`. Supports `http://`, `https://`, and `socks5://` schemes. |
+| `DISCORD_ALLOW_ANY_ATTACHMENT` | No | `false` | When `true`, the bot accepts attachments of any file type (not just the built-in PDF/text/zip/office allowlist). Unknown types are cached to disk and surfaced to the agent as a local path with `application/octet-stream` MIME so it can inspect them with `terminal` / `read_file` / `ffprobe` / etc. |
+| `DISCORD_MAX_ATTACHMENT_BYTES` | No | `33554432` | Maximum bytes per attachment the gateway will download and cache. Default 32 MiB. Set to `0` for no cap (attachments are held in memory while being written, so unlimited carries a real memory cost). |
 | `HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS` | No | `0.6` | Grace window the adapter waits before flushing a queued text chunk. Useful for smoothing streamed output. |
 | `HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS` | No | `2.0` | Delay between split chunks when a single message exceeds Discord's length limit. |
 
@@ -613,6 +615,25 @@ The Discord adapter supports native file uploads for every common media type via
 
 Discord's per-upload size limit depends on the server's boost tier (25 MB free, up to 500 MB). If Hermes gets an HTTP 413, the adapter falls back to a link pointing at the local cache path rather than failing silently.
 
+## Receiving Arbitrary File Types
+
+By default the bot caches uploads that match a built-in allowlist — images, audio, video, PDF, text/markdown/csv/log, JSON/XML/YAML/TOML, zip, docx/xlsx/pptx. Anything else (a `.wav`, a `.bin`, a custom-extension dump) gets logged as `Unsupported document type` and dropped before the agent sees it.
+
+To accept arbitrary file types, enable `discord.allow_any_attachment`:
+
+```yaml
+discord:
+  allow_any_attachment: true
+  # Optional — raise/disable the per-file size cap. Default is 32 MiB.
+  # The whole file is held in memory while being cached, so unlimited
+  # uploads carry a real memory cost.
+  max_attachment_bytes: 33554432   # bytes; 0 = unlimited
+```
+
+When the flag is on, any uploaded file is downloaded, cached under `~/.hermes/cache/documents/`, and surfaced to the agent as a `DOCUMENT`-typed message event with `application/octet-stream` MIME. The agent receives a context note pointing at the local path (auto-translated for Docker/Modal sandboxed terminals via `to_agent_visible_cache_path`) and can inspect the file with `terminal` (`ffprobe`, `unzip`, `file`, `strings`, etc.) or `read_file`. The file body is **not** inlined into the prompt — only the path — so binary uploads don't blow up the context window.
+
+Known-text formats already in the allowlist (`.txt`, `.md`, `.log`) continue to have their contents auto-injected up to 100 KiB; that behavior is unchanged when the flag is on.
+
 ## Home Channel
 
 You can designate a "home channel" where the bot sends proactive messages (such as cron job output, reminders, and notifications). There are two ways to set it:

From a1e3d7969e4b448bb56073872f00e013098206a4 Mon Sep 17 00:00:00 2001
From: Saurav0989 <sauravsejal40@gmail.com>
Date: Sun, 17 May 2026 00:51:19 +0530
Subject: [PATCH 158/218] docs: add hermes-eval to Community section

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index efe5515f4d8..25c84f61f66 100644
--- a/README.md
+++ b/README.md
@@ -183,6 +183,7 @@ scripts/run_tests.sh
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
+- 🛠️ [hermes-eval](https://github.com/Saurav0989/hermes-eval) — Skill regression testing and trajectory quality scoring. Catch skill drift before it propagates. Exports quality-filtered trajectories in Atropos RL format. GitHub Actions CI template included.
 
 ---
 

From df80bda77831abc8beec9dfdec0b9e39565b8b68 Mon Sep 17 00:00:00 2001
From: kjames2001 <62420081+kjames2001@users.noreply.github.com>
Date: Sat, 16 May 2026 12:07:09 +0200
Subject: [PATCH 159/218] docs: add Hermes MemPalace to Community plugins
 section

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 25c84f61f66..1b9e8c369c9 100644
--- a/README.md
+++ b/README.md
@@ -184,6 +184,7 @@ scripts/run_tests.sh
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
 - 🛠️ [hermes-eval](https://github.com/Saurav0989/hermes-eval) — Skill regression testing and trajectory quality scoring. Catch skill drift before it propagates. Exports quality-filtered trajectories in Atropos RL format. GitHub Actions CI template included.
+- 🧠 [Hermes MemPalace](https://github.com/kjames2001/hermes-mempalace) — Native MemPalace memory provider plugin: semantic search, knowledge graph, and diary journaling via ChromaDB.
 
 ---
 

From d5ce85c423af825bffb5333bfd433a805e29bdeb Mon Sep 17 00:00:00 2001
From: Avi Fenesh <aviarchi1994@gmail.com>
Date: Thu, 14 May 2026 20:27:09 +0300
Subject: [PATCH 160/218] docs: add computer-use-linux community MCP

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 1b9e8c369c9..b934293a8df 100644
--- a/README.md
+++ b/README.md
@@ -182,6 +182,7 @@ scripts/run_tests.sh
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
+- 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — Linux desktop-control MCP server for Hermes and other MCP hosts, with AT-SPI accessibility trees, Wayland/X11 input, screenshots, and compositor window targeting.
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
 - 🛠️ [hermes-eval](https://github.com/Saurav0989/hermes-eval) — Skill regression testing and trajectory quality scoring. Catch skill drift before it propagates. Exports quality-filtered trajectories in Atropos RL format. GitHub Actions CI template included.
 - 🧠 [Hermes MemPalace](https://github.com/kjames2001/hermes-mempalace) — Native MemPalace memory provider plugin: semantic search, knowledge graph, and diary journaling via ChromaDB.

From 31a805883b1a39826f5faf13190d37164d3ba2ad Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Sun, 17 May 2026 04:18:58 +0800
Subject: [PATCH 161/218] docs(delegation): show api_mode override in
 custom-endpoint example

---
 website/docs/user-guide/features/delegation.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/website/docs/user-guide/features/delegation.md b/website/docs/user-guide/features/delegation.md
index ec09d148f94..077e2083d7a 100644
--- a/website/docs/user-guide/features/delegation.md
+++ b/website/docs/user-guide/features/delegation.md
@@ -274,6 +274,7 @@ delegation:
   model: "qwen2.5-coder"
   base_url: "http://localhost:1234/v1"
   api_key: "local-key"
+  # api_mode: "anthropic_messages"  # Optional. Wire protocol override for base_url ("chat_completions", "codex_responses", or "anthropic_messages"). Empty = auto-detect from URL (e.g. /anthropic suffix). Set explicitly for endpoints the heuristic can't classify (Azure AI Foundry, MiniMax, Zhipu GLM, LiteLLM proxies, …).
 ```
 
 :::tip

From 86f3776a7252126d1eed6651fe1fe6668b7ed85e Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Sun, 17 May 2026 04:18:50 +0800
Subject: [PATCH 162/218] docs(delegation): document api_mode wire-protocol
 override from #26824

---
 website/docs/user-guide/configuration.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 77e5d74ad42..5ac0d8c9df2 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1667,6 +1667,7 @@ delegation:
   # provider: "openrouter"                  # Override provider (empty = inherit parent)
   # base_url: "http://localhost:1234/v1"    # Direct OpenAI-compatible endpoint (takes precedence over provider)
   # api_key: "local-key"                    # API key for base_url (falls back to OPENAI_API_KEY)
+  # api_mode: ""                            # Wire protocol for base_url: "chat_completions", "codex_responses", or "anthropic_messages". Empty = auto-detect from URL (e.g. /anthropic suffix → anthropic_messages). Set explicitly for non-standard endpoints the heuristic can't detect.
   max_concurrent_children: 3                # Parallel children per batch (floor 1, no ceiling). Also via DELEGATION_MAX_CONCURRENT_CHILDREN env var.
   max_spawn_depth: 1                        # Delegation tree depth cap (1-3, clamped). 1 = flat (default): parent spawns leaves that cannot delegate. 2 = orchestrator children can spawn leaf grandchildren. 3 = three levels.
   orchestrator_enabled: true                # Global kill switch. When false, role="orchestrator" is ignored and every child is forced to leaf regardless of max_spawn_depth.
@@ -1676,6 +1677,8 @@ delegation:
 
 **Direct endpoint override:** If you want the obvious custom-endpoint path, set `delegation.base_url`, `delegation.api_key`, and `delegation.model`. That sends subagents directly to that OpenAI-compatible endpoint and takes precedence over `delegation.provider`. If `delegation.api_key` is omitted, Hermes falls back to `OPENAI_API_KEY` only.
 
+**Wire protocol (`api_mode`):** Hermes auto-detects the wire protocol from `delegation.base_url` (e.g. paths ending in `/anthropic` → `anthropic_messages`; Codex / native Anthropic / Kimi-coding hostnames keep their existing detection). For endpoints the heuristic can't classify — for example Azure AI Foundry, MiniMax, Zhipu GLM, or LiteLLM proxies fronting an Anthropic-shaped backend — set `delegation.api_mode` explicitly to one of `chat_completions`, `codex_responses`, or `anthropic_messages`. Leave it empty (the default) to keep auto-detection.
+
 The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `copilot`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
 
 **Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).

From 6f7292a555b425005fc80190f9480bad305c7ba4 Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Sat, 16 May 2026 00:18:31 +0800
Subject: [PATCH 163/218] docs(cron): document name-based job lookup from
 #26231

---
 website/docs/user-guide/features/cron.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index 9a14e6dcd1e..9772d433812 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -125,6 +125,10 @@ Jobs with a `workdir` run sequentially on the scheduler tick, not in the paralle
 
 You do not need to delete and recreate jobs just to change them.
 
+:::tip Job reference
+The `<job_id>` placeholder below (and in [Lifecycle actions](#lifecycle-actions)) also accepts the job's name (case-insensitive) — handy when you remember `morning-digest` but not the hex ID. An exact job ID takes precedence over name matches; if the reference is not an ID and a name matches more than one job, the command refuses and prints the candidate IDs so you can disambiguate.
+:::
+
 ### Chat
 
 ```bash

From 49bd95c43203a2264a7352acc47c35c34c8d5a65 Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Sat, 16 May 2026 04:12:36 +0800
Subject: [PATCH 164/218] docs(security): document YOLO mode visual indicators
 added in #26238

---
 website/docs/user-guide/security.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md
index 2a48deb2448..0ff53539057 100644
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@@ -64,6 +64,11 @@ The `/yolo` command is a **toggle** — each use flips the mode on or off:
 
 YOLO mode is available in both CLI and gateway sessions. Internally, it sets the `HERMES_YOLO_MODE` environment variable which is checked before every command execution.
 
+When YOLO is active, Hermes shows two persistent visual reminders so it's hard to forget that approval prompts are bypassed:
+
+- A red banner line at session start when YOLO is already active: `⚠ YOLO mode — all approval prompts bypassed`. Hidden when YOLO is off so the default banner stays uncluttered.
+- A `⚠ YOLO` fragment in the status bar across all width tiers, updated live as you toggle YOLO on or off (rich-text renderer and plain-text fallback).
+
 :::danger
 YOLO mode disables **all** dangerous command safety checks for the session — **except** the hardline blocklist (see below). Use only when you fully trust the commands being generated (e.g., well-tested automation scripts in disposable environments).
 :::

From c741eacd0c1d81ec8db93bfe7e4cf8296d2f3d55 Mon Sep 17 00:00:00 2001
From: BROCCOLO1D <279959838+BROCCOLO1D@users.noreply.github.com>
Date: Sat, 16 May 2026 20:31:34 -0700
Subject: [PATCH 165/218] docs(spotify): document Home Assistant speaker
 routing

---
 website/docs/user-guide/features/spotify.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/website/docs/user-guide/features/spotify.md b/website/docs/user-guide/features/spotify.md
index 5e57688e48f..e9b8f3748a1 100644
--- a/website/docs/user-guide/features/spotify.md
+++ b/website/docs/user-guide/features/spotify.md
@@ -12,7 +12,7 @@ Unlike Hermes' built-in OAuth integrations (Google, GitHub Copilot, Codex), Spot
 
 ## Setup
 
-### One-shot: `hermes tools`
+### One-shot: `hermes tools` or first-run setup
 
 The fastest path. Run:
 
@@ -20,7 +20,9 @@ The fastest path. Run:
 hermes tools
 ```
 
-Scroll to `🎵 Spotify`, press space to toggle it on, then `s` to save. Hermes drops you straight into the OAuth flow — if you don't have a Spotify app yet, it walks you through creating one inline. Once you finish, the toolset is enabled AND authenticated in one pass.
+Scroll to `🎵 Spotify`, press space to toggle it on, then `s` to save. The same toggle is also available during the first-run `hermes setup` / `hermes setup tools` flow. Spotify stays opt-in, so enabling it there runs the same provider-aware configuration as `hermes tools`.
+
+Hermes drops you straight into the OAuth flow — if you don't have a Spotify app yet, it walks you through creating one inline. Once you finish, the toolset is enabled AND authenticated in one pass.
 
 If you prefer to do the steps separately (or you're re-authing later), use the two-step flow below.
 
@@ -125,6 +127,12 @@ Control and inspect playback, plus fetch recently played history.
 | `list` | Every Spotify Connect device visible to your account |
 | `transfer` | Move playback to `device_id`. Optional `play: true` starts playback on transfer |
 
+### Home Assistant-managed speakers
+
+If Home Assistant manages speakers that already support Spotify Connect (for example Sonos, Echo, Nest, or other Connect-capable speakers), they appear in `spotify_devices list` automatically whenever Spotify can see them. Hermes does not need a Home Assistant ↔ Spotify bridge for this path — Spotify handles the device routing natively.
+
+Ask Hermes to transfer playback by the speaker's display name (for example, “transfer Spotify to the kitchen speaker”), or call `spotify_devices list` and pass the exact `device_id` to `spotify_devices transfer` when scripting. If the speaker is missing, open the Spotify app or the speaker's Spotify integration once so Spotify registers it as an active Connect target.
+
 #### `spotify_queue`
 | Action | Purpose | Premium? |
 |--------|---------|----------|

From 903ac23bc879cbeb9d70b1941176a91cc736b643 Mon Sep 17 00:00:00 2001
From: aqilaziz <gonzes7@gmail.com>
Date: Fri, 15 May 2026 18:44:04 +0700
Subject: [PATCH 166/218] docs(dashboard): clarify chat tab tui flag

---
 website/docs/reference/cli-commands.md            | 9 ++++++++-
 website/docs/user-guide/features/web-dashboard.md | 5 +++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 3b5b7d2e925..4cfc80191f1 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -1106,13 +1106,17 @@ hermes claw migrate --source /home/user/old-openclaw
 hermes dashboard [options]
 ```
 
-Launch the web dashboard — a browser-based UI for managing configuration, API keys, and monitoring sessions. Requires `pip install hermes-agent[web]` (FastAPI + Uvicorn). See [Web Dashboard](/docs/user-guide/features/web-dashboard) for full documentation.
+Launch the web dashboard — a browser-based UI for managing configuration, API keys, and monitoring sessions. Requires `pip install hermes-agent[web]` (FastAPI + Uvicorn). The embedded browser Chat tab requires `--tui` plus the `pty` extra. See [Web Dashboard](/docs/user-guide/features/web-dashboard) for full documentation.
 
 | Option | Default | Description |
 |--------|---------|-------------|
 | `--port` | `9119` | Port to run the web server on |
 | `--host` | `127.0.0.1` | Bind address |
 | `--no-open` | — | Don't auto-open the browser |
+| `--tui` | off | Enable the in-browser Chat tab by running `hermes --tui` behind a PTY/WebSocket bridge. Requires `pip install 'hermes-agent[web,pty]'` and a POSIX PTY environment such as Linux, macOS, or WSL2. |
+| `--insecure` | off | Allow binding to non-localhost hosts. Exposes dashboard credentials on the network; use only behind trusted network controls. |
+| `--stop` | — | Stop running `hermes dashboard` processes and exit. |
+| `--status` | — | List running `hermes dashboard` processes and exit. |
 
 ```bash
 # Default — opens browser to http://127.0.0.1:9119
@@ -1120,6 +1124,9 @@ hermes dashboard
 
 # Custom port, no browser
 hermes dashboard --port 8080 --no-open
+
+# Enable the browser Chat tab
+hermes dashboard --tui
 ```
 
 ## `hermes profile`
diff --git a/website/docs/user-guide/features/web-dashboard.md b/website/docs/user-guide/features/web-dashboard.md
index e7968498586..d7201cbbe08 100644
--- a/website/docs/user-guide/features/web-dashboard.md
+++ b/website/docs/user-guide/features/web-dashboard.md
@@ -35,6 +35,9 @@ hermes dashboard --host 0.0.0.0
 
 # Start without opening browser
 hermes dashboard --no-open
+
+# Enable the in-browser Chat tab
+hermes dashboard --tui
 ```
 
 ## Prerequisites
@@ -49,6 +52,8 @@ The `web` extra pulls in FastAPI/Uvicorn; `pty` pulls in `ptyprocess` (POSIX) or
 
 When you run `hermes dashboard` without the dependencies, it will tell you what to install. If the frontend hasn't been built yet and `npm` is available, it builds automatically on first launch.
 
+The Chat tab is intentionally off for a plain `hermes dashboard` launch. Start the dashboard with `hermes dashboard --tui` or set `HERMES_DASHBOARD_TUI=1` when you want the embedded browser chat pane.
+
 ## Pages
 
 ### Status

From 21078ebcea6dd870835080fdc76a40284418c921 Mon Sep 17 00:00:00 2001
From: PaTTeeL <9150277+PaTTeeL@users.noreply.github.com>
Date: Sat, 16 May 2026 20:31:34 -0700
Subject: [PATCH 167/218] fix(fallback): forward custom_providers to fallback
 model context-length detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The same root cause as the auxiliary compression fix (commit 7becb19):
get_model_context_length() is called without custom_providers, so per-model
context_length overrides are silently skipped.  The fallback activation path
(_try_activate_fallback) had the same missing parameter.

When the agent switches to a fallback provider, the fallback model would use
the models.dev value (e.g. 204800 for NVIDIA NIM minimax-m2.7) instead of
the user-configured one in custom_providers (e.g. 196608) — a subtle
discrepancy that could cause the fallback model to run with an incorrect
context window, leading to truncated messages or failed API requests when
the model does not support the detected length.

Fix: pass self._custom_providers to get_model_context_length() so the
fallback path sees the same per-model overrides as the main model path.
---
 run_agent.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/run_agent.py b/run_agent.py
index 2931c4fa349..e2bfd7ed1e4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9187,6 +9187,7 @@ class AIAgent:
                     self.model, base_url=self.base_url,
                     api_key=self.api_key, provider=self.provider,
                     config_context_length=getattr(self, "_config_context_length", None),
+                    custom_providers=self._custom_providers,
                 )
                 self.context_compressor.update_model(
                     model=self.model,

From 7244116b687f6e5ff5e869c99cdbb1b09c822799 Mon Sep 17 00:00:00 2001
From: Matthew Lai <m@matthewlai.ca>
Date: Thu, 14 May 2026 00:43:49 +0100
Subject: [PATCH 168/218] feat(agent): Added gemma 4 to reasoning allowlist

---
 run_agent.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/run_agent.py b/run_agent.py
index e2bfd7ed1e4..9324b1c2901 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -10123,6 +10123,7 @@ class AIAgent:
             "openai/",
             "x-ai/",
             "google/gemini-2",
+            "google/gemma-4",
             "qwen/qwen3",
             "tencent/hy3-preview",
             "xiaomi/",

From 96b7f3da45931f2f0e8ce6eb880da24d2c44ac7e Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 20:31:55 -0700
Subject: [PATCH 169/218] chore(release): AUTHOR_MAP entries for batch salvage
 contributors

Adds release-note attribution mappings for:
- @Saurav0989 (PR #27071)
- @avifenesh (PR #25902)
- @BROCCOLO1D (PR #26796)
- @matthewlai (PR #25293)
---
 scripts/release.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 5d4cb3eb82f..ee4c948f643 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1086,6 +1086,14 @@ AUTHOR_MAP = {
     "nightcityblade@gmail.com": "nightcityblade",  # PR #24138 (docs voice/tts table)
     "pol.kuijken@gmail.com": "polkn",  # PR #6136 salvage (skill_view collision refusal)
     "robin@soal.org": "rewbs",
+    # batch salvage (May 2026 LHF run)
+    "sauravsejal40@gmail.com": "Saurav0989",  # PR #27071 (docs: hermes-eval community link)
+    "220110965+Saurav0989@users.noreply.github.com": "Saurav0989",
+    "aviarchi1994@gmail.com": "avifenesh",  # PR #25902 (docs: computer-use-linux MCP)
+    "55848801+avifenesh@users.noreply.github.com": "avifenesh",
+    "279959838+BROCCOLO1D@users.noreply.github.com": "BROCCOLO1D",  # PR #26796 (docs: spotify + HA)
+    "m@matthewlai.ca": "matthewlai",  # PR #25293 (feat: gemma 4 reasoning allowlist)
+    "4296245+matthewlai@users.noreply.github.com": "matthewlai",
 }
 
 
From 973f27e95631aaecbda5e32e3fa9e5d7f6a2e1d3 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 20:33:38 -0700
Subject: [PATCH 170/218] fix(run_agent): isolate background review fork from
 external memory plugins (#27190)

Pass skip_memory=True to the AIAgent constructor used by
_spawn_background_review() so the review fork's __init__ no longer
rebuilds a _memory_manager wired to honcho / mem0 / supermemory /
etc. under the parent's session_id.

Before this change, the review fork ingested its harness prompt
(the 'Review the conversation above and update the skill library...'
text) into the user's real memory namespace via three sites in
run_conversation():
  - on_turn_start(turn_count, prompt)      cadence + turn-message
  - prefetch_all(prompt)                   recall query
  - sync_all(prompt, review_output, ...)   harness + review output
                                           recorded as a
                                           (user, assistant) pair

Built-in MEMORY.md / USER.md state is still rebound from the parent
right after construction, so memory(action='add') writes from the
review continue to land on disk; only the external-plugin side
effects are removed.

Reported by @Utku.
---
 run_agent.py                              | 16 ++++++++
 tests/run_agent/test_background_review.py | 48 +++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 9324b1c2901..b10a68cf9d0 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4386,6 +4386,21 @@ class AIAgent:
                     # owns the loop and the agent-loop tools dispatch.
                     if _parent_api_mode == "codex_app_server":
                         _parent_api_mode = "codex_responses"
+                    # skip_memory=True keeps the review fork from
+                    # touching external memory plugins (honcho, mem0,
+                    # supermemory, etc.).  Without it, the fork's
+                    # __init__ rebuilds its own _memory_manager from
+                    # config, scoped to the parent's session_id, and
+                    # run_conversation() then leaks the harness prompt
+                    # into the user's real memory namespace via three
+                    # ingestion sites: on_turn_start (cadence + turn
+                    # message), prefetch_all (recall query), and
+                    # sync_all (harness prompt + review output recorded
+                    # as a (user, assistant) turn pair).  Built-in
+                    # MEMORY.md / USER.md state is re-bound from the
+                    # parent below so memory(action="add") writes from
+                    # the review still land on disk; the review just
+                    # has zero side effects on external providers.
                     review_agent = AIAgent(
                         model=self.model,
                         max_iterations=16,
@@ -4397,6 +4412,7 @@ class AIAgent:
                         api_key=_parent_runtime.get("api_key") or None,
                         credential_pool=getattr(self, "_credential_pool", None),
                         parent_session_id=self.session_id,
+                        skip_memory=True,
                     )
                     review_agent._memory_write_origin = "background_review"
                     review_agent._memory_write_context = "background_review"
diff --git a/tests/run_agent/test_background_review.py b/tests/run_agent/test_background_review.py
index 2e79b10b346..89626f857d5 100644
--- a/tests/run_agent/test_background_review.py
+++ b/tests/run_agent/test_background_review.py
@@ -193,3 +193,51 @@ def test_background_review_summary_is_attributed_to_self_improvement_loop(monkey
     assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), (
         captured_bg_callback[0]
     )
+
+
+def test_background_review_fork_skips_external_memory_plugins(monkeypatch):
+    """The background review fork must NOT touch external memory plugins.
+
+    Without skip_memory=True on the fork constructor, AIAgent.__init__
+    rebuilds its own _memory_manager from config, scoped to the parent's
+    session_id.  The review fork's run_conversation() then leaks the
+    harness prompt into the user's real memory namespace via three
+    ingestion sites: on_turn_start (cadence + turn message),
+    prefetch_all (recall query), and sync_all (harness prompt + review
+    output recorded as a (user, assistant) turn pair).  The fix is a
+    single kwarg on the fork constructor — this test guards it.
+    """
+    captured_kwargs: dict = {}
+
+    class FakeReviewAgent:
+        def __init__(self, **kwargs):
+            captured_kwargs.update(kwargs)
+            self._session_messages = []
+
+        def run_conversation(self, **kwargs):
+            pass
+
+        def shutdown_memory_provider(self):
+            pass
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
+    monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
+
+    agent = _bare_agent()
+
+    AIAgent._spawn_background_review(
+        agent,
+        messages_snapshot=[{"role": "user", "content": "hello"}],
+        review_memory=True,
+    )
+
+    assert captured_kwargs.get("skip_memory") is True, (
+        "Background review fork must be constructed with skip_memory=True "
+        "so AIAgent.__init__ does not rebuild a _memory_manager wired to "
+        "external plugins (honcho, mem0, supermemory, ...).  Without this "
+        "the fork leaks harness prompts into the user's real memory "
+        "namespace via on_turn_start / prefetch_all / sync_all."
+    )

From 290bf93104652bf6acaf50151f0ddac54cb69fde Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 16 May 2026 22:51:51 -0500
Subject: [PATCH 171/218] fix(tui): harden Terminal.app render behavior

Avoid Terminal.app paint corruption by disabling fast-echo in that terminal, sanitizing non-SGR control sequences before ANSI rendering, and defaulting Apple Terminal back to the safer 256-color path unless truecolor is explicitly requested.
---
 hermes_cli/main.py                            | 24 ++++++-
 tests/hermes_cli/test_tui_resume_flow.py      | 28 ++++++++
 ui-tui/src/__tests__/forceTruecolor.test.ts   | 68 +++++++++++++++++++
 ui-tui/src/__tests__/text.test.ts             | 24 +++++++
 .../src/__tests__/textInputFastEcho.test.ts   | 13 +++-
 ui-tui/src/components/messageLine.tsx         |  8 ++-
 ui-tui/src/components/textInput.tsx           |  8 ++-
 ui-tui/src/lib/forceTruecolor.ts              | 30 ++++++++
 ui-tui/src/lib/text.ts                        | 21 +++++-
 9 files changed, 214 insertions(+), 10 deletions(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index bd8fe6c5cff..662bc57b78d 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1080,7 +1080,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
             return [node, str(bundled)], bundled.parent
 
     # 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
-    #    --dev flow: npm install if needed, then tsx src/entry.tsx (no build).
+    #    --dev flow: npm install if needed, then tsx src/entry.tsx.
     if _tui_need_npm_install(tui_dir):
         npm = _node_bin("npm")
         if not os.environ.get("HERMES_QUIET"):
@@ -1102,10 +1102,30 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
             sys.exit(1)
 
     if tui_dev:
+        # Keep the local @hermes/ink package exports in sync with source.
+        # --dev runs src/entry.tsx directly, but @hermes/ink resolves through
+        # packages/hermes-ink/dist/entry-exports.js. If that dist bundle is
+        # stale after a pull, newer hooks/components can exist in src while
+        # being missing at runtime (e.g. useCursorAdvance). Prebuild it here.
+        npm = _node_bin("npm")
+        ink_dir = tui_dir / "packages" / "hermes-ink"
+        result = subprocess.run(
+            [npm, "run", "build"],
+            cwd=str(ink_dir),
+            capture_output=True,
+            text=True,
+        )
+        if result.returncode != 0:
+            combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
+            preview = "\n".join(combined.splitlines()[-30:])
+            print("TUI dev prebuild failed.")
+            if preview:
+                print(preview)
+            sys.exit(1)
+
         tsx = tui_dir / "node_modules" / ".bin" / "tsx"
         if tsx.exists():
             return [str(tsx), "src/entry.tsx"], tui_dir
-        npm = _node_bin("npm")
         return [npm, "start"], tui_dir
 
     # Always rebuild — esbuild is fast and this avoids staleness-edge-case bugs.
diff --git a/tests/hermes_cli/test_tui_resume_flow.py b/tests/hermes_cli/test_tui_resume_flow.py
index fe6f0358069..25e478ccd2c 100644
--- a/tests/hermes_cli/test_tui_resume_flow.py
+++ b/tests/hermes_cli/test_tui_resume_flow.py
@@ -523,6 +523,34 @@ def test_launch_tui_exports_model_provider_and_toolsets(monkeypatch, main_mod):
     assert env["NODE_ENV"] == "production"
 
 
+def test_make_tui_argv_dev_prebuilds_hermes_ink(monkeypatch, main_mod, tmp_path):
+    tui_dir = tmp_path / "ui-tui"
+    tsx = tui_dir / "node_modules" / ".bin" / "tsx"
+    ink_dir = tui_dir / "packages" / "hermes-ink"
+    tsx.parent.mkdir(parents=True)
+    ink_dir.mkdir(parents=True)
+    tsx.write_text("#!/usr/bin/env node\n", encoding="utf-8")
+
+    monkeypatch.setattr(main_mod, "_ensure_tui_node", lambda: None)
+    monkeypatch.setattr(main_mod, "_tui_need_npm_install", lambda _tui_dir: False)
+    monkeypatch.delenv("HERMES_TUI_DIR", raising=False)
+    monkeypatch.setattr(main_mod.shutil, "which", lambda bin_name: f"/usr/bin/{bin_name}")
+
+    calls = []
+
+    def fake_run(cmd, cwd=None, **_kwargs):
+        calls.append((cmd, cwd))
+        return types.SimpleNamespace(returncode=0, stdout="", stderr="")
+
+    monkeypatch.setattr(main_mod.subprocess, "run", fake_run)
+
+    argv, cwd = main_mod._make_tui_argv(tui_dir, tui_dev=True)
+
+    assert argv == [str(tsx), "src/entry.tsx"]
+    assert cwd == tui_dir
+    assert calls == [(["/usr/bin/npm", "run", "build"], str(ink_dir))]
+
+
 def test_print_tui_exit_summary_includes_resume_and_token_totals(monkeypatch, capsys):
     import hermes_cli.main as main_mod
 
diff --git a/ui-tui/src/__tests__/forceTruecolor.test.ts b/ui-tui/src/__tests__/forceTruecolor.test.ts
index 4d978328152..03d30fa69b7 100644
--- a/ui-tui/src/__tests__/forceTruecolor.test.ts
+++ b/ui-tui/src/__tests__/forceTruecolor.test.ts
@@ -52,6 +52,50 @@ describe('forceTruecolor', () => {
     )
   })
 
+  it('downgrades Apple Terminal when truecolor is only advertised by env', async () => {
+    await withCleanEnv(
+      () => {
+        process.env.TERM_PROGRAM = 'Apple_Terminal'
+        process.env.COLORTERM = 'truecolor'
+        process.env.FORCE_COLOR = '3'
+      },
+      async () => {
+        const mod = await import('../lib/forceTruecolor.js?t=downgrade-' + importId++)
+        expect(
+          mod.shouldDowngradeAppleTerminalTruecolor({
+            TERM_PROGRAM: 'Apple_Terminal',
+            COLORTERM: 'truecolor',
+            FORCE_COLOR: '3'
+          } as NodeJS.ProcessEnv)
+        ).toBe(true)
+        expect(process.env.COLORTERM).toBeUndefined()
+        expect(process.env.FORCE_COLOR).toBeUndefined()
+      }
+    )
+  })
+
+  it('keeps non-Apple terminals untouched when they advertise truecolor', async () => {
+    await withCleanEnv(
+      () => {
+        process.env.TERM_PROGRAM = 'vscode'
+        process.env.COLORTERM = 'truecolor'
+        process.env.FORCE_COLOR = '3'
+      },
+      async () => {
+        const mod = await import('../lib/forceTruecolor.js?t=keep-non-apple-' + importId++)
+        expect(
+          mod.shouldDowngradeAppleTerminalTruecolor({
+            TERM_PROGRAM: 'vscode',
+            COLORTERM: 'truecolor',
+            FORCE_COLOR: '3'
+          } as NodeJS.ProcessEnv)
+        ).toBe(false)
+        expect(process.env.COLORTERM).toBe('truecolor')
+        expect(process.env.FORCE_COLOR).toBe('3')
+      }
+    )
+  })
+
   it('sets COLORTERM=truecolor and FORCE_COLOR=3 when explicitly enabled', async () => {
     await withCleanEnv(
       () => {
@@ -79,6 +123,30 @@ describe('forceTruecolor', () => {
     )
   })
 
+  it('lets explicit opt-in keep Apple truecolor advertisement', async () => {
+    await withCleanEnv(
+      () => {
+        process.env.TERM_PROGRAM = 'Apple_Terminal'
+        process.env.COLORTERM = 'truecolor'
+        process.env.FORCE_COLOR = '3'
+        process.env.HERMES_TUI_TRUECOLOR = '1'
+      },
+      async () => {
+        const mod = await import('../lib/forceTruecolor.js?t=apple-explicit-on-' + importId++)
+        expect(
+          mod.shouldDowngradeAppleTerminalTruecolor({
+            TERM_PROGRAM: 'Apple_Terminal',
+            COLORTERM: 'truecolor',
+            FORCE_COLOR: '3',
+            HERMES_TUI_TRUECOLOR: '1'
+          } as NodeJS.ProcessEnv)
+        ).toBe(false)
+        expect(process.env.COLORTERM).toBe('truecolor')
+        expect(process.env.FORCE_COLOR).toBe('3')
+      }
+    )
+  })
+
   it('respects NO_COLOR', async () => {
     await withCleanEnv(
       () => {
diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts
index 92afd1513df..ffd8f849da2 100644
--- a/ui-tui/src/__tests__/text.test.ts
+++ b/ui-tui/src/__tests__/text.test.ts
@@ -8,12 +8,15 @@ import {
   estimateRows,
   estimateTokensRough,
   fmtK,
+  hasAnsi,
   isToolTrailResultLine,
   lastCotTrailIndex,
   parseToolTrailResultLine,
   pasteTokenLabel,
+  sanitizeAnsiForRender,
   sameToolTrailGroup,
   splitToolDuration,
+  stripAnsi,
   thinkingPreview
 } from '../lib/text.js'
 
@@ -84,6 +87,27 @@ describe('estimateTokensRough', () => {
   })
 })
 
+describe('ANSI sanitizers', () => {
+  const ESC = String.fromCharCode(27)
+  const BEL = String.fromCharCode(7)
+
+  it('strips CSI/OSC/control bytes from plain previews', () => {
+    const sample = `A${ESC}[31mB${ESC}[39m${ESC}[2J${ESC}]0;title${BEL}C${ESC}[?25lD`
+
+    expect(stripAnsi(sample)).toBe('ABCD')
+  })
+
+  it('keeps SGR color spans but removes cursor controls for Ansi rendering', () => {
+    const sample = `A${ESC}[31mB${ESC}[39m${ESC}[2J${ESC}]0;title${BEL}${ESC}[?25lC`
+
+    expect(sanitizeAnsiForRender(sample)).toBe(`A${ESC}[31mB${ESC}[39mC`)
+  })
+
+  it('detects non-CSI escape prefixes too', () => {
+    expect(hasAnsi(`ok${ESC}Ppayload${ESC}\\`)).toBe(true)
+  })
+})
+
 describe('thinkingPreview', () => {
   it('adds paragraph breaks before markdown thinking headings', () => {
     const raw =
diff --git a/ui-tui/src/__tests__/textInputFastEcho.test.ts b/ui-tui/src/__tests__/textInputFastEcho.test.ts
index 2e08111ffb4..83b5c511940 100644
--- a/ui-tui/src/__tests__/textInputFastEcho.test.ts
+++ b/ui-tui/src/__tests__/textInputFastEcho.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'
 
-import { canFastAppendShape, canFastBackspaceShape } from '../components/textInput.js'
+import { canFastAppendShape, canFastBackspaceShape, supportsFastEchoTerminal } from '../components/textInput.js'
 
 // The fast-echo path bypasses Ink and writes characters directly to stdout
 // for the common case of typing plain English at the end of the line. These
@@ -172,3 +172,14 @@ describe('canFastBackspaceShape', () => {
     expect(canFastBackspaceShape('hello ', 'hello '.length)).toBe(true)
   })
 })
+
+describe('supportsFastEchoTerminal', () => {
+  it('disables fast-echo in Apple Terminal', () => {
+    expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'Apple_Terminal' } as NodeJS.ProcessEnv)).toBe(false)
+  })
+
+  it('keeps fast-echo enabled in VS Code and unknown terminals', () => {
+    expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe(true)
+    expect(supportsFastEchoTerminal({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
+  })
+})
diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx
index 238b551ae97..f44f1813804 100644
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -12,6 +12,7 @@ import {
   compactPreview,
   hasAnsi,
   isPasteBackedText,
+  sanitizeAnsiForRender,
   stripAnsi
 } from '../lib/text.js'
 import type { Theme } from '../theme.js'
@@ -85,13 +86,14 @@ export const MessageLine = memo(function MessageLine({
   if (msg.role === 'tool') {
     const maxChars = Math.max(24, cols - 14)
     const stripped = hasAnsi(msg.text) ? stripAnsi(msg.text) : msg.text
+    const safeAnsi = hasAnsi(msg.text) ? sanitizeAnsiForRender(msg.text) : msg.text
     const preview = compactPreview(stripped, maxChars) || '(empty tool result)'
 
     return (
       <Box alignSelf="flex-start" borderColor={t.color.muted} borderStyle="round" marginLeft={3} paddingX={1}>
         {hasAnsi(msg.text) ? (
           <Text wrap="truncate-end">
-            <Ansi>{msg.text}</Ansi>
+            <Ansi>{safeAnsi}</Ansi>
           </Text>
         ) : (
           <Text color={t.color.muted} wrap="truncate-end">
@@ -129,13 +131,13 @@ export const MessageLine = memo(function MessageLine({
               {msg.text.length.toLocaleString()} chars
             </Text>
           </Box>
-          {systemOpen && <Ansi>{msg.text}</Ansi>}
+          {systemOpen && <Ansi>{sanitizeAnsiForRender(msg.text)}</Ansi>}
         </Box>
       )
     }
 
     if (msg.role !== 'user' && hasAnsi(msg.text)) {
-      return <Ansi>{msg.text}</Ansi>
+      return <Ansi>{sanitizeAnsiForRender(msg.text)}</Ansi>
     }
 
     if (msg.role === 'assistant') {
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index b3c79357368..ace2f479dc1 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -283,6 +283,12 @@ export function canFastBackspaceShape(current: string, cursor: number, columns?:
   return ASCII_PRINTABLE_RE.test(removed)
 }
 
+export function supportsFastEchoTerminal(env: NodeJS.ProcessEnv = process.env): boolean {
+  // Terminal.app still shows paint/cursor artifacts under the fast-echo
+  // bypass path. Fall back to the normal Ink render path there.
+  return (env.TERM_PROGRAM ?? '').trim() !== 'Apple_Terminal'
+}
+
 function renderWithCursor(value: string, cursor: number) {
   const pos = Math.max(0, Math.min(cursor, value.length))
 
@@ -559,7 +565,7 @@ export function TextInput({
     }, 16)
   }
 
-  const canFastEchoBase = () => focus && termFocus && !selected && !mask && !!stdout?.isTTY
+  const canFastEchoBase = () => supportsFastEchoTerminal() && focus && termFocus && !selected && !mask && !!stdout?.isTTY
 
   const canFastAppend = (current: string, cursor: number, text: string) =>
     canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current)
diff --git a/ui-tui/src/lib/forceTruecolor.ts b/ui-tui/src/lib/forceTruecolor.ts
index 25de7b2dc34..cd63154e040 100644
--- a/ui-tui/src/lib/forceTruecolor.ts
+++ b/ui-tui/src/lib/forceTruecolor.ts
@@ -19,12 +19,42 @@ export function shouldForceTruecolor(env: NodeJS.ProcessEnv = process.env): bool
   return TRUE_RE.test(override)
 }
 
+const isAppleTerminal = (env: NodeJS.ProcessEnv = process.env) => (env.TERM_PROGRAM ?? '').trim() === 'Apple_Terminal'
+
+const isAdvertisedTruecolor = (env: NodeJS.ProcessEnv = process.env) => {
+  const colorTerm = (env.COLORTERM ?? '').trim().toLowerCase()
+  const forceColor = (env.FORCE_COLOR ?? '').trim()
+
+  return colorTerm === 'truecolor' || colorTerm === '24bit' || forceColor === '3'
+}
+
+export function shouldDowngradeAppleTerminalTruecolor(env: NodeJS.ProcessEnv = process.env): boolean {
+  if (!isAppleTerminal(env)) {
+    return false
+  }
+
+  if (shouldForceTruecolor(env)) {
+    return false
+  }
+
+  return isAdvertisedTruecolor(env)
+}
+
 if (shouldForceTruecolor()) {
   if (!process.env.COLORTERM) {
     process.env.COLORTERM = 'truecolor'
   }
 
   process.env.FORCE_COLOR = '3'
+} else if (shouldDowngradeAppleTerminalTruecolor()) {
+  // Terminal.app may advertise truecolor even when RGB SGR paths render
+  // incorrectly. Keep Hermes on the safer TERM-driven 256-color path unless
+  // users explicitly opt back in via HERMES_TUI_TRUECOLOR=1.
+  delete process.env.COLORTERM
+
+  if ((process.env.FORCE_COLOR ?? '').trim() === '3') {
+    delete process.env.FORCE_COLOR
+  }
 }
 
 export {}
diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts
index 744046f6be4..46dd1f67e15 100644
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -9,12 +9,27 @@ import { VERBS } from '../content/verbs.js'
 import type { ThinkingMode } from '../types.js'
 
 const ESC = String.fromCharCode(27)
-const ANSI_RE = new RegExp(`${ESC}\\[[0-9;]*m`, 'g')
+const BEL = String.fromCharCode(7)
+const ANSI_CSI_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*[@-~]`, 'g')
+const ANSI_CSI_WITH_CMD_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*([@-~])`, 'g')
+const ANSI_OSC_RE = new RegExp(`${ESC}\\][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
+const ANSI_STRING_RE = new RegExp(`${ESC}[PX^_][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
+const ANSI_STRAY_ESC_RE = new RegExp(`${ESC}(?!\\[)[\\s\\S]?`, 'g')
+const CONTROL_RE = /[\x00-\x08\x0B\x0C\x0E-\x1A\x1C-\x1F\x7F]/g
 const WS_RE = /\s+/g
 
-export const stripAnsi = (s: string) => s.replace(ANSI_RE, '')
+export const stripAnsi = (s: string) =>
+  s.replace(ANSI_OSC_RE, '').replace(ANSI_STRING_RE, '').replace(ANSI_CSI_RE, '').replace(ANSI_STRAY_ESC_RE, '').replace(CONTROL_RE, '')
 
-export const hasAnsi = (s: string) => s.includes(`${ESC}[`) || s.includes(`${ESC}]`)
+export const sanitizeAnsiForRender = (s: string) =>
+  s
+    .replace(ANSI_OSC_RE, '')
+    .replace(ANSI_STRING_RE, '')
+    .replace(ANSI_CSI_WITH_CMD_RE, (seq, cmd: string) => (cmd === 'm' ? seq : ''))
+    .replace(ANSI_STRAY_ESC_RE, '')
+    .replace(CONTROL_RE, '')
+
+export const hasAnsi = (s: string) => s.includes(ESC)
 
 const renderEstimateLine = (line: string) => {
   const trimmed = line.trim()

From 9b2d58159c70b46214d0ef961168bbc826651663 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 16 May 2026 22:55:42 -0500
Subject: [PATCH 172/218] fix(cli): satisfy ruff encoding requirement in
 send_cmd

Specify utf-8 when reading message bodies from --file paths so the full-repo ruff enforcement check passes in CI.
---
 hermes_cli/send_cmd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/send_cmd.py b/hermes_cli/send_cmd.py
index 451bb3b4964..2d0c3418ea2 100644
--- a/hermes_cli/send_cmd.py
+++ b/hermes_cli/send_cmd.py
@@ -58,7 +58,7 @@ def _read_message_body(
         if file_path == "-":
             return sys.stdin.read()
         try:
-            return Path(file_path).read_text()
+            return Path(file_path).read_text(encoding="utf-8")
         except OSError as exc:
             print(f"hermes send: cannot read {file_path}: {exc}", file=sys.stderr)
             sys.exit(_USAGE_EXIT)

From 7e1788db5d569f61d3aed32f74963208b03835ec Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 16 May 2026 22:58:00 -0500
Subject: [PATCH 173/218] fix(tui): harden ansi sanitizers for dangling CSI

Strip incomplete CSI prefixes before rendering, remove carriage returns from sanitized output, and add regression tests to prevent escape-sequence recomposition across message boundaries.
---
 ui-tui/src/__tests__/text.test.ts | 12 ++++++++++++
 ui-tui/src/lib/text.ts            | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts
index ffd8f849da2..566d1e41cf6 100644
--- a/ui-tui/src/__tests__/text.test.ts
+++ b/ui-tui/src/__tests__/text.test.ts
@@ -97,12 +97,24 @@ describe('ANSI sanitizers', () => {
     expect(stripAnsi(sample)).toBe('ABCD')
   })
 
+  it('strips incomplete CSI prefixes and carriage returns', () => {
+    const sample = `A${ESC}[31mB${ESC}[12;${ESC}[CD\rE`
+
+    expect(stripAnsi(sample)).toBe('ABDE')
+  })
+
   it('keeps SGR color spans but removes cursor controls for Ansi rendering', () => {
     const sample = `A${ESC}[31mB${ESC}[39m${ESC}[2J${ESC}]0;title${BEL}${ESC}[?25lC`
 
     expect(sanitizeAnsiForRender(sample)).toBe(`A${ESC}[31mB${ESC}[39mC`)
   })
 
+  it('keeps valid SGR while removing dangling CSI and carriage returns', () => {
+    const sample = `A${ESC}[31mB${ESC}[12;${ESC}[39mC\rD`
+
+    expect(sanitizeAnsiForRender(sample)).toBe(`A${ESC}[31mB${ESC}[39mCD`)
+  })
+
   it('detects non-CSI escape prefixes too', () => {
     expect(hasAnsi(`ok${ESC}Ppayload${ESC}\\`)).toBe(true)
   })
diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts
index 46dd1f67e15..5a5bdce603d 100644
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -12,20 +12,30 @@ const ESC = String.fromCharCode(27)
 const BEL = String.fromCharCode(7)
 const ANSI_CSI_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*[@-~]`, 'g')
 const ANSI_CSI_WITH_CMD_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*([@-~])`, 'g')
+const ANSI_INCOMPLETE_CSI_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*(?=${ESC}|\\n|$)`, 'g')
 const ANSI_OSC_RE = new RegExp(`${ESC}\\][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
 const ANSI_STRING_RE = new RegExp(`${ESC}[PX^_][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
 const ANSI_STRAY_ESC_RE = new RegExp(`${ESC}(?!\\[)[\\s\\S]?`, 'g')
-const CONTROL_RE = /[\x00-\x08\x0B\x0C\x0E-\x1A\x1C-\x1F\x7F]/g
+const CONTROL_RE = /[\x00-\x08\x0B\x0C\x0D\x0E-\x1A\x1C-\x1F\x7F]/g
 const WS_RE = /\s+/g
 
 export const stripAnsi = (s: string) =>
-  s.replace(ANSI_OSC_RE, '').replace(ANSI_STRING_RE, '').replace(ANSI_CSI_RE, '').replace(ANSI_STRAY_ESC_RE, '').replace(CONTROL_RE, '')
+  s
+    .replace(ANSI_OSC_RE, '')
+    .replace(ANSI_STRING_RE, '')
+    .replace(ANSI_INCOMPLETE_CSI_RE, '')
+    .replace(ANSI_CSI_RE, '')
+    .replace(ANSI_INCOMPLETE_CSI_RE, '')
+    .replace(ANSI_STRAY_ESC_RE, '')
+    .replace(CONTROL_RE, '')
 
 export const sanitizeAnsiForRender = (s: string) =>
   s
     .replace(ANSI_OSC_RE, '')
     .replace(ANSI_STRING_RE, '')
+    .replace(ANSI_INCOMPLETE_CSI_RE, '')
     .replace(ANSI_CSI_WITH_CMD_RE, (seq, cmd: string) => (cmd === 'm' ? seq : ''))
+    .replace(ANSI_INCOMPLETE_CSI_RE, '')
     .replace(ANSI_STRAY_ESC_RE, '')
     .replace(CONTROL_RE, '')
 

From a65f723e6847f8d326947011b0d6d345d240ce25 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Sat, 16 May 2026 23:00:58 -0500
Subject: [PATCH 174/218] fix(review): address Copilot follow-up on sanitizer
 and file decode errors

Consume multi-byte non-CSI ESC sequences during ANSI sanitization and handle UnicodeDecodeError for `hermes send --file` so review findings are resolved without regressions.
---
 hermes_cli/send_cmd.py            |  2 +-
 tests/hermes_cli/test_send_cmd.py | 13 +++++++++++++
 ui-tui/src/__tests__/text.test.ts |  7 +++++++
 ui-tui/src/lib/text.ts            |  3 +++
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/send_cmd.py b/hermes_cli/send_cmd.py
index 2d0c3418ea2..4cf3198cb40 100644
--- a/hermes_cli/send_cmd.py
+++ b/hermes_cli/send_cmd.py
@@ -59,7 +59,7 @@ def _read_message_body(
             return sys.stdin.read()
         try:
             return Path(file_path).read_text(encoding="utf-8")
-        except OSError as exc:
+        except (OSError, UnicodeDecodeError) as exc:
             print(f"hermes send: cannot read {file_path}: {exc}", file=sys.stderr)
             sys.exit(_USAGE_EXIT)
 
diff --git a/tests/hermes_cli/test_send_cmd.py b/tests/hermes_cli/test_send_cmd.py
index 9202315e3d4..802cff88c90 100644
--- a/tests/hermes_cli/test_send_cmd.py
+++ b/tests/hermes_cli/test_send_cmd.py
@@ -173,6 +173,19 @@ def test_file_not_found_is_usage_error(fake_tool, capsys, monkeypatch):
     assert "cannot read" in err.lower()
 
 
+def test_file_decode_error_is_usage_error(fake_tool, capsys, monkeypatch, tmp_path):
+    monkeypatch.setattr("sys.stdin.isatty", lambda: True)
+    bad = tmp_path / "bad-bytes.bin"
+    bad.write_bytes(b"\xff\xfe\x00")
+
+    args = _parse(["--to", "telegram", "--file", str(bad)])
+    with pytest.raises(SystemExit) as exc:
+        send_cmd.cmd_send(args)
+    assert exc.value.code == 2
+    err = capsys.readouterr().err
+    assert "cannot read" in err.lower()
+
+
 def test_tool_error_returns_failure_exit(monkeypatch, capsys):
     import sys as _sys
     import types as _types
diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts
index 566d1e41cf6..047ad67912f 100644
--- a/ui-tui/src/__tests__/text.test.ts
+++ b/ui-tui/src/__tests__/text.test.ts
@@ -115,6 +115,13 @@ describe('ANSI sanitizers', () => {
     expect(sanitizeAnsiForRender(sample)).toBe(`A${ESC}[31mB${ESC}[39mCD`)
   })
 
+  it('strips multi-byte non-CSI ESC sequences without leaving trailing bytes', () => {
+    const sample = `A${ESC}(0B${ESC}%GC${ESC})0D`
+
+    expect(stripAnsi(sample)).toBe('ABCD')
+    expect(sanitizeAnsiForRender(sample)).toBe('ABCD')
+  })
+
   it('detects non-CSI escape prefixes too', () => {
     expect(hasAnsi(`ok${ESC}Ppayload${ESC}\\`)).toBe(true)
   })
diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts
index 5a5bdce603d..ef3a1816975 100644
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -15,6 +15,7 @@ const ANSI_CSI_WITH_CMD_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*([@-~])`, 'g')
 const ANSI_INCOMPLETE_CSI_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*(?=${ESC}|\\n|$)`, 'g')
 const ANSI_OSC_RE = new RegExp(`${ESC}\\][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
 const ANSI_STRING_RE = new RegExp(`${ESC}[PX^_][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
+const ANSI_NON_CSI_ESC_SEQ_RE = new RegExp(`${ESC}(?!\\[|\\]|P|X|\\^|_)[ -/]*[0-~]`, 'g')
 const ANSI_STRAY_ESC_RE = new RegExp(`${ESC}(?!\\[)[\\s\\S]?`, 'g')
 const CONTROL_RE = /[\x00-\x08\x0B\x0C\x0D\x0E-\x1A\x1C-\x1F\x7F]/g
 const WS_RE = /\s+/g
@@ -26,6 +27,7 @@ export const stripAnsi = (s: string) =>
     .replace(ANSI_INCOMPLETE_CSI_RE, '')
     .replace(ANSI_CSI_RE, '')
     .replace(ANSI_INCOMPLETE_CSI_RE, '')
+    .replace(ANSI_NON_CSI_ESC_SEQ_RE, '')
     .replace(ANSI_STRAY_ESC_RE, '')
     .replace(CONTROL_RE, '')
 
@@ -36,6 +38,7 @@ export const sanitizeAnsiForRender = (s: string) =>
     .replace(ANSI_INCOMPLETE_CSI_RE, '')
     .replace(ANSI_CSI_WITH_CMD_RE, (seq, cmd: string) => (cmd === 'm' ? seq : ''))
     .replace(ANSI_INCOMPLETE_CSI_RE, '')
+    .replace(ANSI_NON_CSI_ESC_SEQ_RE, '')
     .replace(ANSI_STRAY_ESC_RE, '')
     .replace(CONTROL_RE, '')
 

From 32c3f06a5bf0867d11d309503122f59d0dba75d9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 22:03:37 -0700
Subject: [PATCH 175/218] docs(readme): remove hermes-eval and Hermes MemPalace
 from Community links (#27271)

Both links were merged from low-risk batch salvage but on review they're
brand-new single-commit personal repos with zero stars/forks and no
track record. README links from us implicitly endorse community
projects; the Community section should have a minimum activity bar
before we link to a repo, not just "the contributor opened a PR."

MemPalace in particular wraps an in-process memory provider, so a
README endorsement carries more risk than a typical docs link.
---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index b934293a8df..abdc66245f3 100644
--- a/README.md
+++ b/README.md
@@ -184,8 +184,6 @@ scripts/run_tests.sh
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
 - 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — Linux desktop-control MCP server for Hermes and other MCP hosts, with AT-SPI accessibility trees, Wayland/X11 input, screenshots, and compositor window targeting.
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
-- 🛠️ [hermes-eval](https://github.com/Saurav0989/hermes-eval) — Skill regression testing and trajectory quality scoring. Catch skill drift before it propagates. Exports quality-filtered trajectories in Atropos RL format. GitHub Actions CI template included.
-- 🧠 [Hermes MemPalace](https://github.com/kjames2001/hermes-mempalace) — Native MemPalace memory provider plugin: semantic search, knowledge graph, and diary journaling via ChromaDB.
 
 ---
 

From 4e9cedcd4c6de05a0603a3969b6991fb0836761e Mon Sep 17 00:00:00 2001
From: 0xchainer <109617724+0xchainer@users.noreply.github.com>
Date: Sun, 17 May 2026 01:19:36 +0300
Subject: [PATCH 176/218] fix(gateway): add missing logger definition to
 prevent NameError in _all_platforms

hermes_cli/gateway.py:3702 referenced logger.debug() but 'logger' was
never defined in the module, causing a NameError at runtime if the
try/except around discover_plugins() caught an exception.

Added import logging and logger = logging.getLogger(__name__)
at module level to resolve the undefined name.
---
 hermes_cli/gateway.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index a865bcaf8be..c5303e32799 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -5,6 +5,7 @@ Handles: hermes gateway [run|start|stop|restart|status|install|uninstall|setup]
 """
 
 import asyncio
+import logging
 import os
 import shutil
 import signal
@@ -38,6 +39,7 @@ from hermes_cli.setup import (
 )
 from hermes_cli.colors import Colors, color
 
+logger = logging.getLogger(__name__)
 
 # =============================================================================
 # Process Management (for manual gateway runs)

From 57feef320178ce79070dc7ac9a399d3cf587eca4 Mon Sep 17 00:00:00 2001
From: 0xchainer <109617724+0xchainer@users.noreply.github.com>
Date: Sun, 17 May 2026 02:14:22 +0300
Subject: [PATCH 177/218] test(gateway): add smoke test for logger init
 (regression guard for #27154)

Verify that the module has a logger instance with the correct name,
preventing regression of the NameError fixed in a31d5aff.
---
 tests/hermes_cli/test_gateway.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py
index 225947994d2..20c2ca7cda4 100644
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@@ -559,3 +559,9 @@ class TestStopProfileGateway:
         assert calls["kill"] == 1          # one SIGTERM
         assert calls["alive_probes"] == 20 # 20 liveness polls over the 2s window
         assert calls["remove"] == 0
+
+
+def test_module_has_logger():
+    """Verify module has a logger instance (regression guard for #27154)."""
+    assert hasattr(gateway, "logger")
+    assert gateway.logger.name == "hermes_cli.gateway"

From a81cfd0a0a6de1c3028b9c800ae917d9bd7e5162 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 20:27:45 -0700
Subject: [PATCH 178/218] chore(release): map 0xchainer and kronexoi emails for
 upcoming salvages

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index ee4c948f643..07952b63c0e 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1094,6 +1094,8 @@ AUTHOR_MAP = {
     "279959838+BROCCOLO1D@users.noreply.github.com": "BROCCOLO1D",  # PR #26796 (docs: spotify + HA)
     "m@matthewlai.ca": "matthewlai",  # PR #25293 (feat: gemma 4 reasoning allowlist)
     "4296245+matthewlai@users.noreply.github.com": "matthewlai",
+    "109617724+0xchainer@users.noreply.github.com": "0xchainer",  # PR #27154/27138/27147 salvage
+    "201800237+kronexoi@users.noreply.github.com": "kronexoi",  # PR #27167 salvage (Teams port fallback)
 }
 
 
From 60531889d56a9a9d2b3f0b9ee04aea4145ede392 Mon Sep 17 00:00:00 2001
From: 0xchainer <109617724+0xchainer@users.noreply.github.com>
Date: Sun, 17 May 2026 00:35:05 +0300
Subject: [PATCH 179/218] fix: remove unused import and hoist module-level
 constant

- Remove unused  from tools/tts_tool.py (dead code)
- Move _BUILTIN_DELIVER_PLATFORMS set from send() method to module
  scope in gateway/platforms/webhook.py to avoid reallocation on
  every call
---
 gateway/platforms/webhook.py | 13 +++++++------
 tools/tts_tool.py            |  1 -
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index 83aa93e94cb..d7714ff5652 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -54,6 +54,13 @@ from gateway.platforms.base import (
 
 logger = logging.getLogger(__name__)
 
+_BUILTIN_DELIVER_PLATFORMS = {
+    "telegram", "discord", "slack", "signal", "sms", "whatsapp",
+    "matrix", "mattermost", "homeassistant", "email", "dingtalk",
+    "feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
+    "qqbot", "yuanbao",
+}
+
 DEFAULT_HOST = "0.0.0.0"
 DEFAULT_PORT = 8644
 _INSECURE_NO_AUTH = "INSECURE_NO_AUTH"
@@ -238,12 +245,6 @@ class WebhookAdapter(BasePlatformAdapter):
 
         # Cross-platform delivery — any platform with a gateway adapter.
         # Check both built-in names and plugin-registered platforms.
-        _BUILTIN_DELIVER_PLATFORMS = {
-            "telegram", "discord", "slack", "signal", "sms", "whatsapp",
-            "matrix", "mattermost", "homeassistant", "email", "dingtalk",
-            "feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
-            "qqbot", "yuanbao",
-        }
         _is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
         if not _is_known_platform:
             try:
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 57907f76833..9e46fa6a7ef 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -44,7 +44,6 @@ import queue
 import re
 import shlex
 import shutil
-import signal
 import subprocess
 import tempfile
 import threading

From 4b17c2411ab2518fabe9d87872a03a47d3b8cfcc Mon Sep 17 00:00:00 2001
From: 0xchainer <109617724+0xchainer@users.noreply.github.com>
Date: Sun, 17 May 2026 01:08:53 +0300
Subject: [PATCH 180/218] fix(skills): return None instead of truthy stub when
 skill load fails

build_skill_invocation_message() returns a non-empty placeholder string
('[Failed to load skill: ...]') when the skill exists in the command cache
but loading the actual SKILL.md payload fails. CLI/gateway callers treat
any truthy return value as success, so the failure is silently routed into
the model as if it were a valid skill prompt.

Return None instead, matching the existing behavior for unknown commands,
so callers using 'if msg:' can properly detect the failure.
---
 agent/skill_commands.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agent/skill_commands.py b/agent/skill_commands.py
index c8b7d039c46..42e7c857434 100644
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -425,7 +425,7 @@ def build_skill_invocation_message(
 
     loaded = _load_skill_payload(skill_info["skill_dir"], task_id=task_id)
     if not loaded:
-        return f"[Failed to load skill: {skill_info['name']}]"
+        return None
 
     loaded_skill, skill_dir, skill_name = loaded
 

From 782d743730e3df193c5969bc9897350fa14429cb Mon Sep 17 00:00:00 2001
From: 0xchainer <109617724+0xchainer@users.noreply.github.com>
Date: Sun, 17 May 2026 01:57:34 +0300
Subject: [PATCH 181/218] test(skills): add regression test for skill load
 failure returning None

Add test_returns_none_when_skill_load_fails to verify that
build_skill_invocation_message() returns None when a registered
skill exists in the command cache but _load_skill_payload() fails.
This guards against regression of the fix in 877d01b.
---
 tests/agent/test_skill_commands.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py
index bbecd5c43f6..c11976ef978 100644
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@@ -466,6 +466,14 @@ Generate some audio.
             msg = build_skill_invocation_message("/nonexistent")
         assert msg is None
 
+    def test_returns_none_when_skill_load_fails(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(tmp_path, "broken-skill")
+            scan_skill_commands()
+            with patch("agent.skill_commands._load_skill_payload", return_value=None):
+                msg = build_skill_invocation_message("/broken-skill", "do stuff")
+        assert msg is None
+
     def test_uses_shared_skill_loader_for_secure_setup(self, tmp_path, monkeypatch):
         monkeypatch.delenv("TENOR_API_KEY", raising=False)
         calls = []

From 1eadb069c794ecf0626ff5c785191e604ad1f6bc Mon Sep 17 00:00:00 2001
From: shellybotmoyer <258858106+shellybotmoyer@users.noreply.github.com>
Date: Sat, 16 May 2026 22:51:17 -0700
Subject: [PATCH 182/218] fix(kanban): --severity filter uses >= comparison per
 documented behavior (#26379)

---
 hermes_cli/kanban.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py
index 76f95db4fac..b4024e2e70e 100644
--- a/hermes_cli/kanban.py
+++ b/hermes_cli/kanban.py
@@ -1403,7 +1403,7 @@ def _cmd_diagnostics(args: argparse.Namespace) -> int:
         sev = getattr(args, "severity", None)
         if sev:
             for tid in list(diags_by_task.keys()):
-                kept = [d for d in diags_by_task[tid] if d.severity == sev]
+                kept = [d for d in diags_by_task[tid] if kd.SEVERITY_ORDER.index(d.severity) >= kd.SEVERITY_ORDER.index(sev)]
                 if kept:
                     diags_by_task[tid] = kept
                 else:

From 8d756a421071d0dfd507226c03430413b6de9491 Mon Sep 17 00:00:00 2001
From: austrian_guy <33156212+ether-btc@users.noreply.github.com>
Date: Sat, 16 May 2026 00:56:33 +0200
Subject: [PATCH 183/218] fix(run_agent): guard memory provider init against
 empty/whitespace string

---
 run_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index b10a68cf9d0..41af78f16c3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2061,7 +2061,7 @@ class AIAgent:
             try:
                 _mem_provider_name = mem_config.get("provider", "") if mem_config else ""
 
-                if _mem_provider_name:
+                if _mem_provider_name and _mem_provider_name.strip():
                     from agent.memory_manager import MemoryManager as _MemoryManager
                     from plugins.memory import load_memory_provider as _load_mem
                     self._memory_manager = _MemoryManager()

From 7282ef1b9d4ba9b77057f53ca92cbb2ed674129b Mon Sep 17 00:00:00 2001
From: LifeJiggy <Bloomtonjovish@gmail.com>
Date: Fri, 15 May 2026 19:39:29 +0100
Subject: [PATCH 184/218] fix: add paste collapse logging to aid debugging

Adds logger.info when large pastes are collapsed to file
references in both paste-code paths (handle_paste and
_on_text_changed). Logs paste ID, line count, character
count, and file path so operators can correlate missing-
content reports with specific paste files. This is a
diagnostic aid, not a fix for the paste-drop issue.
---
 cli.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cli.py b/cli.py
index c1ba1c0ddd2..42b1482578e 100644
--- a/cli.py
+++ b/cli.py
@@ -12604,6 +12604,7 @@ class HermesCLI:
                     paste_dir.mkdir(parents=True, exist_ok=True)
                     paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt"
                     paste_file.write_text(pasted_text, encoding="utf-8")
+                    logger.info("Collapsed paste #%d: %d lines, %d chars -> %s", _paste_counter[0], line_count + 1, len(pasted_text), paste_file)
                     placeholder = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]"
                     prefix = ""
                     if buf.cursor_position > 0 and buf.text[buf.cursor_position - 1] != '\n':
@@ -12771,6 +12772,7 @@ class HermesCLI:
                 paste_dir.mkdir(parents=True, exist_ok=True)
                 paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt"
                 paste_file.write_text(text, encoding="utf-8")
+                logger.info("Collapsed paste #%d: %d lines, %d chars -> %s (fallback)", _paste_counter[0], line_count + 1, len(text), paste_file)
                 _paste_just_collapsed[0] = True
                 buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]"
                 buf.cursor_position = len(buf.text)

From 4279da4db62a8a3cba7004c6fb1e1e53ea2c6a29 Mon Sep 17 00:00:00 2001
From: nekwo <beastant1@gmail.com>
Date: Fri, 15 May 2026 12:41:10 -0400
Subject: [PATCH 185/218] fix(windows): make PowerShell installer parse in 5.1

---
 scripts/install.ps1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 5ed7aa755fd..53a1ea96486 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -1,4 +1,4 @@
-# ============================================================================
+﻿# ============================================================================
 # Hermes Agent Installer for Windows
 # ============================================================================
 # Installation script for Windows (PowerShell).

From 7d09bb19155275a18b3952a1f5f399d1dd87df24 Mon Sep 17 00:00:00 2001
From: flooryyyy <67979730+flooryyyy@users.noreply.github.com>
Date: Fri, 15 May 2026 15:01:32 +0100
Subject: [PATCH 186/218] fix(delegate): tool_trace false-positive error
 detection for short outputs

---
 tools/delegate_tool.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 136ea63ac40..e9ad32e0d3a 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -1649,7 +1649,7 @@ def _run_single_child(
                             trace_by_id[tc_id] = entry_t
                 elif msg.get("role") == "tool":
                     content = msg.get("content", "")
-                    is_error = bool(content and "error" in content[:80].lower())
+                    is_error = _looks_like_error_output(content)
                     result_meta = {
                         "result_bytes": len(content),
                         "status": "error" if is_error else "ok",

From 508b022acb85b5b1395945e1b5f27e4706a81853 Mon Sep 17 00:00:00 2001
From: dgians <188585318+dgians@users.noreply.github.com>
Date: Sat, 16 May 2026 22:51:17 -0700
Subject: [PATCH 187/218] feat(gateway): add .ts/.py/.sh to
 SUPPORTED_DOCUMENT_TYPES

The gateway already accepts plain-text config files (.ini, .cfg) and
structured formats (.json, .yaml, .toml) as documents, but not common
source-file extensions. Sending a .ts/.py/.sh file currently requires
renaming it to .txt first.

Adds .ts, .py, .sh as text/plain, consistent with the existing
.ini/.cfg entries.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index c6bdc38c3b9..7b3147e21f4 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -829,6 +829,9 @@ SUPPORTED_DOCUMENT_TYPES = {
     ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
     ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
     ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    ".ts": "text/plain",
+    ".py": "text/plain",
+    ".sh": "text/plain",
 }
 
 
From 1a4e64ba06d071551ae8266ea809de0e412b45c8 Mon Sep 17 00:00:00 2001
From: shellybotmoyer <258858106+shellybotmoyer@users.noreply.github.com>
Date: Sat, 16 May 2026 22:51:17 -0700
Subject: [PATCH 188/218] fix(credential_pool): parse ISO-string last_status_at
 during from_dict rehydration (#25516)

---
 agent/credential_pool.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 504742145c1..7f27873a7fb 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -129,6 +129,9 @@ class PooledCredential:
     def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential":
         field_names = {f.name for f in fields(cls) if f.name != "provider"}
         data = {k: payload.get(k) for k in field_names if k in payload}
+        # Rehydrated last_status_at may be an ISO string from to_dict() — normalize to float epoch
+        if "last_status_at" in data and isinstance(data["last_status_at"], str):
+            data["last_status_at"] = _parse_absolute_timestamp(data["last_status_at"])
         extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None}
         data["extra"] = extra
         data.setdefault("id", uuid.uuid4().hex[:6])

From 5f72dd817ec2709f6fda323ef96dc55e21dd3d0d Mon Sep 17 00:00:00 2001
From: flanny7 <mottei.survive@gmail.com>
Date: Sun, 17 May 2026 02:41:29 +0900
Subject: [PATCH 189/218] fix(install): use resolved python variable in
 setup_open_webui.sh

The install_open_webui function correctly resolved the python interpreter into the $py variable, but hardcoded 'python' in subsequent pip install commands. This caused 'command not found' or 'externally-managed-environment' errors on systems where 'python' is not implicitly aliased to 'python3'.
---
 scripts/setup_open_webui.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/setup_open_webui.sh b/scripts/setup_open_webui.sh
index 0cca44ddd71..9975c911f3f 100755
--- a/scripts/setup_open_webui.sh
+++ b/scripts/setup_open_webui.sh
@@ -163,8 +163,8 @@ install_open_webui() {
   "$py" -m venv "$OPEN_WEBUI_VENV"
   # shellcheck disable=SC1090
   source "$OPEN_WEBUI_VENV/bin/activate"
-  python -m pip install --upgrade pip setuptools wheel
-  python -m pip install open-webui
+  "$py" -m pip install --upgrade pip setuptools wheel
+  "$py" -m pip install open-webui
 }
 
 write_launcher() {

From 9a9f8a6d9945c9bf3118c557f85ad1956de4f553 Mon Sep 17 00:00:00 2001
From: hermesagent26 <276067471+hermesagent26@users.noreply.github.com>
Date: Sat, 16 May 2026 22:51:17 -0700
Subject: [PATCH 190/218] fix(run_agent): detect kimi models via model name for
 reasoning pad
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 previously only checked provider ID and
base URL. When kimi-k2.6 is served via ollama-cloud (or any third-party
provider), provider is not 'kimi-coding' and base URL is not
api.kimi.com — so reasoning_content pad was never injected. This caused
HTTP 400 from Ollama Cloud's Go backend: 'invalid message content type:
map[string]interface {}'.

Fix: add model-name detection ('kimi' in model.lower()) so any route
serving a kimi model gets the required reasoning_content echo-back.

Refs the 400/401 Telegram errors where kimi-k2.6 via ollama-cloud
consistently failed after tool-call turns.
---
 run_agent.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 41af78f16c3..b239f2aeb60 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -10437,12 +10437,16 @@ class AIAgent:
         Kimi ``/coding`` and Moonshot thinking mode both require
         ``reasoning_content`` on every assistant tool-call message; omitting
         it causes the next replay to fail with HTTP 400.
+
+        Also detects Kimi models served through third-party providers (e.g.
+        ollama-cloud) by matching ``kimi`` in the model name.
         """
         return (
             self.provider in {"kimi-coding", "kimi-coding-cn"}
             or base_url_host_matches(self.base_url, "api.kimi.com")
             or base_url_host_matches(self.base_url, "moonshot.ai")
             or base_url_host_matches(self.base_url, "moonshot.cn")
+            or "kimi" in (self.model or "").lower()
         )
 
     def _needs_deepseek_tool_reasoning(self) -> bool:

From 773a0faca0888df7b1ea310c554b70a18710813c Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 22:52:28 -0700
Subject: [PATCH 191/218] fix(deepseek): set default_aux_model on profile so
 aux warning stops firing

Closes #26924 (and supersedes #26926) in spirit.

DeepSeek was missing `default_aux_model` on its `ProviderProfile`, so
`_get_aux_model_for_provider("deepseek")` returned an empty string and
the compression / vision / session-search paths emitted

  "No auxiliary LLM provider configured -- context compression will
  drop middle turns without a summary."

on every DeepSeek session, even when the user had perfectly working
DeepSeek credentials.

Fix lands at the profile layer rather than the legacy
`_API_KEY_PROVIDER_AUX_MODELS_FALLBACK` dict the original PR targeted.
Every modern provider (gemini, zai, minimax, anthropic, kimi-coding,
stepfun, ollama-cloud, gmi, novita, kilocode, ai-gateway, opencode-zen)
sets `default_aux_model` on its `ProviderProfile`; the fallback dict
only exists for providers that predate the profiles system.

Tests added under `tests/plugins/model_providers/test_deepseek_profile.py`:
- `test_profile_advertises_deepseek_chat`  -- pins the profile attribute
- `test_consumer_api_returns_deepseek_chat` -- pins the consumer API behavior
- `test_consumer_api_returns_non_empty`     -- regression guard for the
  symptom in the issue

Original diagnosis and aux-model choice from @kriscolab in PR #26926;
moved one layer up.

Co-authored-by: kriscolab <71590782+kriscolab@users.noreply.github.com>
---
 plugins/model-providers/deepseek/__init__.py  |  1 +
 .../model_providers/test_deepseek_profile.py  | 23 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/plugins/model-providers/deepseek/__init__.py b/plugins/model-providers/deepseek/__init__.py
index f67146df113..525766f87eb 100644
--- a/plugins/model-providers/deepseek/__init__.py
+++ b/plugins/model-providers/deepseek/__init__.py
@@ -94,6 +94,7 @@ deepseek = DeepSeekProfile(
         "deepseek-reasoner",
     ),
     base_url="https://api.deepseek.com/v1",
+    default_aux_model="deepseek-chat",
 )
 
 register_provider(deepseek)
diff --git a/tests/plugins/model_providers/test_deepseek_profile.py b/tests/plugins/model_providers/test_deepseek_profile.py
index c53e70070a8..8c316a38086 100644
--- a/tests/plugins/model_providers/test_deepseek_profile.py
+++ b/tests/plugins/model_providers/test_deepseek_profile.py
@@ -182,3 +182,26 @@ class TestDeepSeekFullKwargsIntegration:
         )
         assert "reasoning_effort" not in kwargs
         assert "extra_body" not in kwargs or "thinking" not in kwargs.get("extra_body", {})
+
+
+class TestDeepSeekAuxModel:
+    """DeepSeek aux model is set on the profile so users stop seeing the
+    bogus 'No auxiliary LLM provider configured' warning (#26924).
+
+    Pinned at the profile layer rather than the legacy
+    `_API_KEY_PROVIDER_AUX_MODELS_FALLBACK` dict — new providers are
+    expected to set `default_aux_model` on `ProviderProfile`, and the
+    fallback dict only exists for providers that predate the profiles
+    system.
+    """
+
+    def test_profile_advertises_deepseek_chat(self, deepseek_profile):
+        assert deepseek_profile.default_aux_model == "deepseek-chat"
+
+    def test_consumer_api_returns_deepseek_chat(self):
+        from agent.auxiliary_client import _get_aux_model_for_provider
+        assert _get_aux_model_for_provider("deepseek") == "deepseek-chat"
+
+    def test_consumer_api_returns_non_empty(self):
+        from agent.auxiliary_client import _get_aux_model_for_provider
+        assert _get_aux_model_for_provider("deepseek") != ""

From e90a52deafcca5c6b1fc06b0ef427348ec796077 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 22:53:29 -0700
Subject: [PATCH 192/218] chore(release): AUTHOR_MAP entries for batch salvage
 group 2 contributors

Adds release-note attribution mappings for 10 contributors from the
low-hanging-fruit salvage group 2 batch:
- @shellybotmoyer (PR #26661, #25576)
- @ether-btc (PR #26632)
- @LifeJiggy (PR #26516)
- @nekwo (PR #26481)
- @flooryyyy (PR #26374)
- @dgians (PR #26034, incl. zealy-tzco bot-committer alias)
- @flanny7 (PR #27030)
- @hermesagent26 (PR #26438)
- @kriscolab (PR #26926, co-author on salvage commit)
---
 scripts/release.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 07952b63c0e..6bbc2ad4ae3 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1096,6 +1096,23 @@ AUTHOR_MAP = {
     "4296245+matthewlai@users.noreply.github.com": "matthewlai",
     "109617724+0xchainer@users.noreply.github.com": "0xchainer",  # PR #27154/27138/27147 salvage
     "201800237+kronexoi@users.noreply.github.com": "kronexoi",  # PR #27167 salvage (Teams port fallback)
+    # batch salvage (May 2026 LHF run, group 2)
+    "shellybotmoyer@example.com": "shellybotmoyer",  # PR #26661 (kanban --severity >=)
+    "coulson@shellybotmoyer.com": "shellybotmoyer",  # PR #25576 (credential_pool ISO rehydrate)
+    "258858106+shellybotmoyer@users.noreply.github.com": "shellybotmoyer",
+    "33156212+ether-btc@users.noreply.github.com": "ether-btc",  # PR #26632 (memory provider whitespace guard)
+    "Bloomtonjovish@gmail.com": "LifeJiggy",  # PR #26516 (paste collapse logging)
+    "141562589+LifeJiggy@users.noreply.github.com": "LifeJiggy",
+    "beastant1@gmail.com": "nekwo",  # PR #26481 (PS5.1 UTF-8 BOM)
+    "43717185+nekwo@users.noreply.github.com": "nekwo",
+    "67979730+flooryyyy@users.noreply.github.com": "flooryyyy",  # PR #26374 (tool_trace error detection)
+    "188585318+dgians@users.noreply.github.com": "dgians",  # PR #26034 (.ts/.py/.sh docs types)
+    "zealy@tz.co": "dgians",  # PR #26034 (bot-committed by zealy-tzco under dgians' PR)
+    "mottei.survive@gmail.com": "flanny7",  # PR #27030 (setup_open_webui python var)
+    "20530505+flanny7@users.noreply.github.com": "flanny7",
+    "hermesagent26@gmail.com": "hermesagent26",  # PR #26438 (kimi model-name reasoning pad)
+    "276067471+hermesagent26@users.noreply.github.com": "hermesagent26",
+    "71590782+kriscolab@users.noreply.github.com": "kriscolab",  # PR #26926 (deepseek default_aux_model)
 }
 
 
From ea2ee51f0b40eac51e279e3ac746f99c2b38e4c0 Mon Sep 17 00:00:00 2001
From: kronexoi <201800237+kronexoi@users.noreply.github.com>
Date: Sun, 17 May 2026 02:29:49 +0300
Subject: [PATCH 193/218] fix(teams): fall back to default port on invalid port
 config

---
 plugins/platforms/teams/adapter.py | 11 ++++++++++-
 tests/gateway/test_teams.py        | 11 +++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/plugins/platforms/teams/adapter.py b/plugins/platforms/teams/adapter.py
index 990d03bb499..c71baeb9d93 100644
--- a/plugins/platforms/teams/adapter.py
+++ b/plugins/platforms/teams/adapter.py
@@ -116,6 +116,13 @@ def _parse_bool(value: Any, *, default: bool = False) -> bool:
     return default
 
 
+def _coerce_port(value: Any, *, default: int = _DEFAULT_PORT) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
 class _StaticAccessTokenProvider:
     """Minimal token-provider shim so outbound Graph delivery can reuse the shared client."""
 
@@ -623,7 +630,9 @@ class TeamsAdapter(BasePlatformAdapter):
         self._client_id = extra.get("client_id") or os.getenv("TEAMS_CLIENT_ID", "")
         self._client_secret = extra.get("client_secret") or os.getenv("TEAMS_CLIENT_SECRET", "")
         self._tenant_id = extra.get("tenant_id") or os.getenv("TEAMS_TENANT_ID", "")
-        self._port = int(extra.get("port") or os.getenv("TEAMS_PORT", str(_DEFAULT_PORT)))
+        self._port = _coerce_port(
+            extra.get("port") or os.getenv("TEAMS_PORT", str(_DEFAULT_PORT))
+        )
         self._app: Optional["App"] = None
         self._runner: Optional["web.AppRunner"] = None
         self._dedup = MessageDeduplicator(max_size=1000)
diff --git a/tests/gateway/test_teams.py b/tests/gateway/test_teams.py
index 34cd0ca3eed..58b8c35a5c2 100644
--- a/tests/gateway/test_teams.py
+++ b/tests/gateway/test_teams.py
@@ -283,6 +283,17 @@ class TestTeamsAdapterInit:
         adapter = TeamsAdapter(_make_config(client_id="id", client_secret="secret", tenant_id="tenant"))
         assert adapter._port == 5000
 
+    def test_invalid_port_from_extra_falls_back_to_default(self):
+        adapter = TeamsAdapter(
+            _make_config(client_id="id", client_secret="secret", tenant_id="tenant", port="abc")
+        )
+        assert adapter._port == 3978
+
+    def test_invalid_port_from_env_falls_back_to_default(self, monkeypatch):
+        monkeypatch.setenv("TEAMS_PORT", "abc")
+        adapter = TeamsAdapter(_make_config(client_id="id", client_secret="secret", tenant_id="tenant"))
+        assert adapter._port == 3978
+
     def test_platform_value(self):
         adapter = TeamsAdapter(_make_config(client_id="id", client_secret="secret", tenant_id="tenant"))
         assert adapter.platform.value == "teams"

From e5f19af2a5cfed9ec7f6ea1e1f770f8a8b342de3 Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Sun, 17 May 2026 00:47:21 -0400
Subject: [PATCH 194/218] feat(install.ps1): stage protocol + Windows clean-VM
 hardening pass

Adds an opt-in stage protocol that lets programmatic drivers (the
desktop GUI's onboarding wizard, CI, future install.sh parity) drive
install.ps1 one step at a time with structured JSON results. Default
invocation (`irm | iex` one-liner) behaves unchanged.

Entry points:
  install.ps1                  Today's interactive install (unchanged)
  install.ps1 -ProtocolVersion Emit protocol version integer
  install.ps1 -Manifest        Emit JSON manifest of available stages
  install.ps1 -Stage <name>    Run one stage, emit JSON result
  install.ps1 -NonInteractive  Suppress Read-Host prompts (skips the
                               setup wizard and gateway autostart)
  install.ps1 -Json            Machine-readable completion frame

Manifest exposes 14 stages across prereqs/install/finalize/post-install
categories, with 2 (configure, gateway) flagged needs_user_input=true
so GUI drivers can skip them and handle the equivalent UX themselves.

Along the way, clean-VM testing on stock Windows 10/11 surfaced a
series of latent install.ps1 bugs that were never exercised by
developer machines. Fixed in the same commit:

* Encoding: file is now pure ASCII with no BOM. Windows PowerShell
  5.1 reads BOM-less files as Windows-1252 and chokes on em-dashes
  (and other UTF-8 sequences), while iex chokes on a leading U+FEFF.
  Pure-ASCII satisfies both invocation paths.

* EAP=Stop + native `2>&1` captures: PowerShell wraps stderr lines
  from native commands as ErrorRecord objects under EAP=Stop and
  throws even when the command exits 0. Relaxed to EAP=Continue
  around the astral.sh uv installer, `uv python install`, `npm
  install`, `npx playwright install`, the venv import probes, and
  the Node winget fallback. Check $LASTEXITCODE for the real signal.

* Cross-process state: each `-Stage <name>` invocation spawns a
  fresh powershell child. $script:UvCmd set by Stage-Uv was invisible
  to Stage-Python; PATH updated by Stage-Git/Stage-Node was invisible
  to subsequent stages spawned by the driver shell. Added Resolve-UvCmd
  helper called at the top of every stage that needs uv, and a
  Sync-EnvPath helper called at the top of Invoke-Stage to refresh
  PATH from the registry.

* UAC avoidance: `winget install OpenJS.NodeJS.LTS` triggers a UAC
  prompt that often appears minimized in the taskbar -- looks like a
  hang. Switched Test-Node to prefer the official portable Node zip
  dropped into %LOCALAPPDATA%\hermes\node\ (mirrors the PortableGit
  pattern Install-Git already uses). winget kept as fallback.

* npx hangs on confirmation: `npx playwright install chromium` blocks
  on stdin waiting for "Need to install playwright@X.Y.Z (y/N)" when
  playwright isn't in local node_modules. Tee-Object pipelines
  disconnect stdin from the user's TTY so the install hangs forever.
  Pass `--yes` to auto-accept.

* Silent long-running installs: `*> $logPath` redirected every stream
  to disk and left the user staring at a frozen "Installing..." line
  for the 5-10 minutes Playwright Chromium takes to download. Switched
  to `2>&1 | ForEach-Object { "$_" } | Tee-Object -FilePath $log` so
  output streams live to the console AND captures to log for failure
  diagnostics. ForEach-Object coercion strips PowerShell's red
  NativeCommandError formatter from stderr items.

* Console encoding: forced [Console]::OutputEncoding to UTF-8 so
  playwright/git/npm progress bars, box-drawing, and check marks render
  correctly instead of as IBM437/Windows-1252 mojibake.

* Performance: set $ProgressPreference = "SilentlyContinue" so
  Invoke-WebRequest doesn't paint its per-chunk progress bar. The
  PS 5.1 progress UI throttles downloads by 10-100x (a 57MB PortableGit
  grab takes 5 minutes with the bar on vs ~20 seconds with it off,
  same network). Affects PortableGit, Node portable zip, and the
  Hermes repo zip fallback.

Tests: scripts/tests/test-install-ps1-stage-protocol.ps1 provides 19
metadata-only assertions covering -ProtocolVersion, -Manifest schema,
and unknown -Stage error frame. No install side effects.

End-to-end validated on a clean Windows 10 VM via:
  1. `irm <branch>/scripts/install.ps1 | iex` (canonical CLI path)
  2. `powershell -File install.ps1 -Stage X` iterated through every
     stage (GUI driver path, exercises cross-process fixes)
---
 scripts/install.ps1                           | 732 ++++++++++++++----
 .../tests/test-install-ps1-stage-protocol.ps1 | 134 ++++
 2 files changed, 736 insertions(+), 130 deletions(-)
 create mode 100644 scripts/tests/test-install-ps1-stage-protocol.ps1

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 53a1ea96486..4d7545ca689 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -17,11 +17,49 @@ param(
     [switch]$SkipSetup,
     [string]$Branch = "main",
     [string]$HermesHome = "$env:LOCALAPPDATA\hermes",
-    [string]$InstallDir = "$env:LOCALAPPDATA\hermes\hermes-agent"
+    [string]$InstallDir = "$env:LOCALAPPDATA\hermes\hermes-agent",
+
+    # --- Stage protocol (additive; default invocation behaves as before) ----
+    # See the "Stage protocol" section near the bottom of the file for the
+    # full contract.  Intended for programmatic drivers (the desktop GUI's
+    # onboarding wizard, CI, future install.sh parity, etc.).  CLI users
+    # running the canonical `irm | iex` one-liner never touch these flags.
+    [switch]$Manifest,
+    [string]$Stage,
+    [switch]$ProtocolVersion,
+    [switch]$NonInteractive,
+    [switch]$Json
 )
 
 $ErrorActionPreference = "Stop"
 
+# Suppress Invoke-WebRequest's per-chunk progress bar.  Windows PowerShell
+# 5.1's progress UI repaints synchronously on every received byte, which
+# pegs CPU on a single core and throttles downloads by 10-100x (a 57MB
+# PortableGit grab can take 5 minutes with progress on vs 20 seconds
+# with progress off, on the same network).  Every IWR call in this
+# script is fire-and-forget so we never need to see the bar.  Restored
+# automatically when the script exits.
+$ProgressPreference = "SilentlyContinue"
+
+# Force the console to UTF-8 so non-ASCII output from native commands
+# (e.g. playwright's box-drawing progress bars and download banners,
+# git's bullet glyphs, npm's check marks) renders correctly instead of
+# as IBM437/Windows-1252 mojibake (sequences like 0xE2 0x95 0x94 box-
+# drawing chars decoded under the legacy DOS codepage).  This is a
+# DISPLAY-only fix; the underlying bytes are already correct.  We do
+# NOT change the file's own encoding (it remains pure ASCII for PS 5.1
+# parser compatibility; see comments at the top of the entry-point
+# dispatch).  This affects only what the user sees in their terminal
+# during this install run, and reverts automatically when the script
+# exits and the host's console encoding is restored.
+try {
+    [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new()
+} catch {
+    # Some constrained PowerShell hosts disallow encoding mutation.
+    # Mojibake on output is then cosmetic-only, install still works.
+}
+
 # ============================================================================
 # Configuration
 # ============================================================================
@@ -31,38 +69,43 @@ $RepoUrlHttps = "https://github.com/NousResearch/hermes-agent.git"
 $PythonVersion = "3.11"
 $NodeVersion = "22"
 
+# Stage-protocol version.  Bumped only for genuinely breaking changes to the
+# manifest schema, stage-name set semantics, or stdout JSON shape.  Adding a
+# new stage does NOT bump this -- drivers iterate the manifest dynamically.
+$InstallStageProtocolVersion = 1
+
 # ============================================================================
 # Helper functions
 # ============================================================================
 
 function Write-Banner {
     Write-Host ""
-    Write-Host "┌─────────────────────────────────────────────────────────┐" -ForegroundColor Magenta
-    Write-Host "│             ⚕ Hermes Agent Installer                    │" -ForegroundColor Magenta
-    Write-Host "├─────────────────────────────────────────────────────────┤" -ForegroundColor Magenta
-    Write-Host "│  An open source AI agent by Nous Research.              │" -ForegroundColor Magenta
-    Write-Host "└─────────────────────────────────────────────────────────┘" -ForegroundColor Magenta
+    Write-Host "+---------------------------------------------------------+" -ForegroundColor Magenta
+    Write-Host "|             * Hermes Agent Installer                    |" -ForegroundColor Magenta
+    Write-Host "+---------------------------------------------------------+" -ForegroundColor Magenta
+    Write-Host "|  An open source AI agent by Nous Research.              |" -ForegroundColor Magenta
+    Write-Host "+---------------------------------------------------------+" -ForegroundColor Magenta
     Write-Host ""
 }
 
 function Write-Info {
     param([string]$Message)
-    Write-Host "→ $Message" -ForegroundColor Cyan
+    Write-Host "-> $Message" -ForegroundColor Cyan
 }
 
 function Write-Success {
     param([string]$Message)
-    Write-Host "✓ $Message" -ForegroundColor Green
+    Write-Host "[OK] $Message" -ForegroundColor Green
 }
 
 function Write-Warn {
     param([string]$Message)
-    Write-Host "⚠ $Message" -ForegroundColor Yellow
+    Write-Host "[!] $Message" -ForegroundColor Yellow
 }
 
 function Write-Err {
     param([string]$Message)
-    Write-Host "✗ $Message" -ForegroundColor Red
+    Write-Host "[X] $Message" -ForegroundColor Red
 }
 
 # ============================================================================
@@ -97,8 +140,22 @@ function Install-Uv {
     # Install uv
     Write-Info "Installing uv (fast Python package manager)..."
     try {
+        # Relax ErrorActionPreference around the nested astral installer.
+        # The astral installer (a separate `powershell -c "irm ... | iex"`)
+        # writes download progress to stderr.  With $ErrorActionPreference
+        # = "Stop" set at the top of this script, PowerShell wraps stderr
+        # lines from native commands (which `powershell -c` is, from our
+        # perspective) as ErrorRecord objects when captured via 2>&1, then
+        # throws a terminating exception on the first one -- even though
+        # uv installs successfully and the child exits 0.  Same fix
+        # pattern Test-Python uses for `uv python install`; verify success
+        # via Test-Path on the expected binary afterwards, which is more
+        # reliable than exit-code/stderr signal anyway.
+        $prevEAP = $ErrorActionPreference
+        $ErrorActionPreference = "Continue"
         powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 2>&1 | Out-Null
-        
+        $ErrorActionPreference = $prevEAP
+
         # Find the installed binary
         $uvExe = "$env:USERPROFILE\.local\bin\uv.exe"
         if (-not (Test-Path $uvExe)) {
@@ -123,12 +180,78 @@ function Install-Uv {
         Write-Info "Try restarting your terminal and re-running"
         return $false
     } catch {
-        Write-Err "Failed to install uv"
+        # Restore EAP in case the try block threw before the assignment
+        if ($prevEAP) { $ErrorActionPreference = $prevEAP }
+        Write-Err "Failed to install uv: $_"
         Write-Info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
         return $false
     }
 }
 
+# Refresh $env:Path from the User + Machine registry hives.  Stage drivers
+# invoke each stage in a fresh powershell process, but those processes
+# inherit env from the parent driver shell, NOT from the registry.  When
+# an earlier stage (Stage-Git, Stage-Node, ...) installs a binary and
+# pushes its directory into User PATH, the next child process's $env:Path
+# is stale and the binary appears missing.  This helper re-reads PATH
+# from the registry so every Invoke-Stage starts from a fresh, up-to-date
+# PATH view.  Cheap (registry reads, no I/O elsewhere) and idempotent.
+function Sync-EnvPath {
+    $env:Path = [Environment]::GetEnvironmentVariable("Path", "User") + ";" + [Environment]::GetEnvironmentVariable("Path", "Machine")
+}
+
+# Re-discover uv without re-installing it.  Cross-process stage drivers
+# (the desktop GUI's onboarding wizard, CI step-runners) invoke each stage
+# in a fresh powershell process, so $script:UvCmd set by Install-Uv in a
+# prior process is not visible here.  Later stages (Test-Python,
+# Install-Venv, Install-Dependencies, Install-PlatformSdks) call this
+# at the top to populate $script:UvCmd from PATH or known install paths.
+# Throws if uv is not findable -- the caller's stage then surfaces a
+# clean error via the stage-driver's try/catch.  Fast path is a single
+# Get-Command call when uv is on PATH (the common case after Stage-Uv
+# ran path-modifying installs in a sibling process).
+function Resolve-UvCmd {
+    # Already resolved (default invocation path: Install-Uv ran earlier
+    # in the same process and set $script:UvCmd).
+    if ($script:UvCmd) {
+        if ($script:UvCmd -eq "uv") {
+            # "uv" on PATH -- verify it's still resolvable (PATH could have
+            # changed mid-session; cheap to recheck).
+            if (Get-Command uv -ErrorAction SilentlyContinue) { return }
+        } elseif (Test-Path $script:UvCmd) {
+            return
+        }
+        # Stale; fall through to re-discover.
+    }
+
+    # Try PATH first (covers `winget install astral.uv`, manual installs,
+    # and the post-Install-Uv state where uv.exe lives in
+    # %USERPROFILE%\.local\bin which the installer added to PATH).
+    if (Get-Command uv -ErrorAction SilentlyContinue) {
+        $script:UvCmd = "uv"
+        return
+    }
+
+    # Refresh PATH from registry in case the current process started before
+    # Install-Uv updated User PATH.
+    $env:Path = [Environment]::GetEnvironmentVariable("Path", "User") + ";" + [Environment]::GetEnvironmentVariable("Path", "Machine")
+    if (Get-Command uv -ErrorAction SilentlyContinue) {
+        $script:UvCmd = "uv"
+        return
+    }
+
+    # Check the well-known install locations the astral.sh installer drops
+    # uv into.  Mirrors the probe order Install-Uv uses.
+    foreach ($uvPath in @("$env:USERPROFILE\.local\bin\uv.exe", "$env:USERPROFILE\.cargo\bin\uv.exe")) {
+        if (Test-Path $uvPath) {
+            $script:UvCmd = $uvPath
+            return
+        }
+    }
+
+    throw "uv is not installed or not on PATH. Run install.ps1 -Stage uv first."
+}
+
 function Test-Python {
     Write-Info "Checking Python $PythonVersion..."
     
@@ -142,7 +265,7 @@ function Test-Python {
         }
     } catch { }
     
-    # Python not found — use uv to install it (no admin needed!)
+    # Python not found -- use uv to install it (no admin needed!)
     Write-Info "Python $PythonVersion not found, installing via uv..."
     try {
         # Temporarily relax ErrorActionPreference: uv writes download progress
@@ -150,7 +273,7 @@ function Test-Python {
         # stderr.  With $ErrorActionPreference = "Stop" (set at the top of this
         # script) PowerShell wraps stderr lines from native commands as
         # ErrorRecord objects when captured via 2>&1, then throws a terminating
-        # exception on the first one — even though uv exits 0 and Python was
+        # exception on the first one -- even though uv exits 0 and Python was
         # installed successfully.  Verify success via `uv python find`
         # afterwards, which is the reliable signal regardless of exit-code
         # semantics or stderr noise.  This fix was previously landed as
@@ -170,7 +293,7 @@ function Test-Python {
             return $true
         }
 
-        # uv ran but Python still not findable — show what happened
+        # uv ran but Python still not findable -- show what happened
         if ($uvExitCode -ne 0) {
             Write-Warn "uv python install output:"
             Write-Host $uvOutput -ForegroundColor DarkGray
@@ -195,7 +318,7 @@ function Test-Python {
         } catch { }
     }
 
-    # Fallback: try system python — but skip the Microsoft Store stub.
+    # Fallback: try system python -- but skip the Microsoft Store stub.
     # On Windows, %LOCALAPPDATA%\Microsoft\WindowsApps\python.exe is a 0-byte
     # reparse-point stub that prints "Python was not found; run without
     # arguments to install from the Microsoft Store..." to stdout and exits
@@ -244,17 +367,17 @@ function Install-Git {
     Ensure Git (and Git Bash) are installed.  Git for Windows bundles bash.exe
     which Hermes uses to run shell commands.
 
-    Priority order (deliberately simple — no winget, no registry, no system
+    Priority order (deliberately simple -- no winget, no registry, no system
     package manager):
-      1. Existing ``git`` on PATH — use it as-is (the common fast path).
+      1. Existing ``git`` on PATH -- use it as-is (the common fast path).
       2. Download **PortableGit** from the official git-for-windows GitHub
          release (self-extracting 7z.exe) and unpack it to
-         ``%LOCALAPPDATA%\hermes\git`` — never touches system Git, never
+         ``%LOCALAPPDATA%\hermes\git`` -- never touches system Git, never
          requires admin, works even on locked-down machines and machines
          with a broken system Git install.
 
     **Why PortableGit, not MinGit:**  MinGit is the minimal-automation
-    distribution and ships ONLY ``git.exe`` — no bash, no POSIX utilities.
+    distribution and ships ONLY ``git.exe`` -- no bash, no POSIX utilities.
     Hermes needs ``bash.exe`` to run shell commands.  PortableGit is the
     full Git for Windows distribution without the installer UI; it ships
     ``git.exe`` + ``bash.exe`` + ``sh``, ``awk``, ``sed``, ``grep``, ``curl``,
@@ -280,9 +403,9 @@ function Install-Git {
     }
 
     # Download PortableGit into $HermesHome\git.  Always works as long as
-    # we can reach github.com — no admin, no winget, no reliance on the
+    # we can reach github.com -- no admin, no winget, no reliance on the
     # user's possibly-broken system Git install.
-    Write-Info "Git not found — downloading PortableGit to $HermesHome\git\ ..."
+    Write-Info "Git not found -- downloading PortableGit to $HermesHome\git\ ..."
     Write-Info "(no admin rights required; isolated from any system Git install)"
 
     try {
@@ -294,7 +417,7 @@ function Install-Git {
                 "64-bit"
             }
         } else {
-            # PortableGit does not ship a 32-bit build — fall back to MinGit 32-bit
+            # PortableGit does not ship a 32-bit build -- fall back to MinGit 32-bit
             # with a warning that bash-based features will be unavailable.
             "32-bit-mingit"
         }
@@ -303,7 +426,7 @@ function Install-Git {
         $release = Invoke-RestMethod -Uri $releaseApi -UseBasicParsing -Headers @{ "User-Agent" = "hermes-installer" }
 
         if ($arch -eq "32-bit-mingit") {
-            Write-Warn "32-bit Windows detected — PortableGit is 64-bit only.  Installing MinGit 32-bit as a last resort; bash-dependent Hermes features (terminal tool, agent-browser) will not work on this machine."
+            Write-Warn "32-bit Windows detected -- PortableGit is 64-bit only.  Installing MinGit 32-bit as a last resort; bash-dependent Hermes features (terminal tool, agent-browser) will not work on this machine."
             $assetPattern = "MinGit-*-32-bit.zip"
             $downloadIsZip = $true
         } elseif ($arch -eq "arm64") {
@@ -428,7 +551,7 @@ function Set-GitBashEnvVar {
 
     # Standard system install locations as a final fallback.  Note:
     # ProgramFiles(x86) can't be referenced via ${env:...} string interpolation
-    # because of the parens — use [Environment]::GetEnvironmentVariable().
+    # because of the parens -- use [Environment]::GetEnvironmentVariable().
     $candidates += "${env:ProgramFiles}\Git\bin\bash.exe"
     $pf86 = [Environment]::GetEnvironmentVariable("ProgramFiles(x86)")
     if ($pf86) { $candidates += "$pf86\Git\bin\bash.exe" }
@@ -443,7 +566,7 @@ function Set-GitBashEnvVar {
         }
     }
 
-    Write-Warn "Could not locate bash.exe — Hermes may not find Git Bash."
+    Write-Warn "Could not locate bash.exe -- Hermes may not find Git Bash."
     Write-Info "If needed, set HERMES_GIT_BASH_PATH manually to your bash.exe path."
 }
 
@@ -467,26 +590,18 @@ function Test-Node {
         return $true
     }
 
-    Write-Info "Node.js not found — installing Node.js $NodeVersion LTS..."
+    Write-Info "Node.js not found -- installing Node.js $NodeVersion LTS..."
 
-    # Try winget first (cleanest on modern Windows)
-    if (Get-Command winget -ErrorAction SilentlyContinue) {
-        Write-Info "Installing via winget..."
-        try {
-            winget install OpenJS.NodeJS.LTS --silent --accept-package-agreements --accept-source-agreements 2>&1 | Out-Null
-            # Refresh PATH
-            $env:Path = [Environment]::GetEnvironmentVariable("Path", "User") + ";" + [Environment]::GetEnvironmentVariable("Path", "Machine")
-            if (Get-Command node -ErrorAction SilentlyContinue) {
-                $version = node --version
-                Write-Success "Node.js $version installed via winget"
-                $script:HasNode = $true
-                return $true
-            }
-        } catch { }
-    }
-
-    # Fallback: download binary zip to ~/.hermes/node/
-    Write-Info "Downloading Node.js $NodeVersion binary..."
+    # Try the portable-zip path FIRST -- no UAC, no admin, no winget MSI.
+    # winget install OpenJS.NodeJS.LTS triggers a system-wide MSI install
+    # which prompts UAC (the dialog often appears minimized in the taskbar
+    # and the install silently waits for consent, looking like a hang).
+    # The portable zip path drops node.exe + npm into $HermesHome\node\
+    # which is user-scoped and identical to how Install-Git handles
+    # PortableGit.  Same UX guarantee: works on locked-down enterprise
+    # machines with no admin rights.
+    Write-Info "Downloading portable Node.js $NodeVersion to $HermesHome\node\ ..."
+    Write-Info "(no admin rights required; isolated from any system Node install)"
     try {
         $arch = if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" }
         $indexUrl = "https://nodejs.org/dist/latest-v${NodeVersion}.x/"
@@ -506,10 +621,23 @@ function Test-Node {
             if ($extractedDir) {
                 if (Test-Path "$HermesHome\node") { Remove-Item -Recurse -Force "$HermesHome\node" }
                 Move-Item $extractedDir.FullName "$HermesHome\node"
+
+                # Session PATH so the rest of this run sees node/npm.
                 $env:Path = "$HermesHome\node;$env:Path"
 
+                # Persist to User PATH so fresh shells (and future stages
+                # in cross-process driver mode) see it.  Matches the
+                # pattern Install-Git uses for PortableGit.
+                $nodeDir = "$HermesHome\node"
+                $userPath = [Environment]::GetEnvironmentVariable("Path", "User")
+                $userPathItems = if ($userPath) { $userPath -split ";" } else { @() }
+                if ($userPathItems -notcontains $nodeDir) {
+                    $userPathItems += $nodeDir
+                    [Environment]::SetEnvironmentVariable("Path", ($userPathItems -join ";"), "User")
+                }
+
                 $version = & "$HermesHome\node\node.exe" --version
-                Write-Success "Node.js $version installed to ~/.hermes/node/"
+                Write-Success "Node.js $version installed to $HermesHome\node\ (portable, user-scoped)"
                 $script:HasNode = $true
 
                 Remove-Item -Force $tmpZip -ErrorAction SilentlyContinue
@@ -518,10 +646,39 @@ function Test-Node {
             }
         }
     } catch {
-        Write-Warn "Download failed: $_"
+        Write-Warn "Portable Node.js download failed: $_"
     }
 
-    Write-Warn "Could not auto-install Node.js"
+    # Fallback: try winget (used to be primary, demoted because the MSI
+    # install triggers a UAC prompt that frequently appears minimized in
+    # the taskbar -- looks like a hang to users on stock Windows).
+    # Kept for environments where the portable download fails (proxy,
+    # locked firewall, etc.) but the user is willing to consent to UAC.
+    if (Get-Command winget -ErrorAction SilentlyContinue) {
+        Write-Info "Falling back to winget (may prompt UAC -- check your taskbar for a flashing icon)..."
+        try {
+            # Relax EAP=Stop so stderr lines from winget don't get wrapped
+            # as ErrorRecords and short-circuit the 2>&1 pipe before we can
+            # check the post-condition.  See the long comment in Install-Uv
+            # for the same pattern.
+            $prevEAP = $ErrorActionPreference
+            $ErrorActionPreference = "Continue"
+            winget install OpenJS.NodeJS.LTS --silent --accept-package-agreements --accept-source-agreements 2>&1 | Out-Null
+            $ErrorActionPreference = $prevEAP
+            # Refresh PATH
+            $env:Path = [Environment]::GetEnvironmentVariable("Path", "User") + ";" + [Environment]::GetEnvironmentVariable("Path", "Machine")
+            if (Get-Command node -ErrorAction SilentlyContinue) {
+                $version = node --version
+                Write-Success "Node.js $version installed via winget"
+                $script:HasNode = $true
+                return $true
+            }
+        } catch {
+            if ($prevEAP) { $ErrorActionPreference = $prevEAP }
+        }
+    }
+
+
     Write-Info "Install manually: https://nodejs.org/en/download/"
     $script:HasNode = $false
     return $true
@@ -657,7 +814,7 @@ function Install-Repository {
 
     if (Test-Path $InstallDir) {
         # Test-Path "$InstallDir\.git" returns True when .git is a file OR a
-        # directory OR a symlink OR a submodule-style gitfile — and also when
+        # directory OR a symlink OR a submodule-style gitfile -- and also when
         # it's a broken stub left over from a failed previous install (e.g.
         # a partial Remove-Item that couldn't delete a locked index.lock).
         # Validate the repo properly by asking git itself.  Two checks
@@ -704,7 +861,7 @@ function Install-Repository {
             # a partial uninstall used to lock the installer into the
             # "update" branch forever, emitting three ``fatal: not a git
             # repository`` errors and failing with "not in a git directory".
-            Write-Warn "Existing directory at $InstallDir is not a valid git repo — replacing it."
+            Write-Warn "Existing directory at $InstallDir is not a valid git repo -- replacing it."
             try {
                 Remove-Item -Recurse -Force $InstallDir -ErrorAction Stop
             } catch {
@@ -750,7 +907,7 @@ function Install-Repository {
         # Fallback: download ZIP archive (bypasses git file I/O issues entirely)
         if (-not $cloneSuccess) {
             if (Test-Path $InstallDir) { Remove-Item -Recurse -Force $InstallDir -ErrorAction SilentlyContinue }
-            Write-Warn "Git clone failed — downloading ZIP archive instead..."
+            Write-Warn "Git clone failed -- downloading ZIP archive instead..."
             try {
                 $zipUrl = "https://github.com/NousResearch/hermes-agent/archive/refs/heads/$Branch.zip"
                 $zipPath = "$env:TEMP\hermes-agent-$Branch.zip"
@@ -841,14 +998,14 @@ function Install-Dependencies {
         $env:VIRTUAL_ENV = "$InstallDir\venv"
     }
 
-    # Hash-verified install (Tier 0) — when uv.lock is present, prefer
+    # Hash-verified install (Tier 0) -- when uv.lock is present, prefer
     # `uv sync --locked`. The lockfile records SHA256 hashes for every
     # transitive dependency, so a compromised transitive (different hash
     # than what we shipped) is REJECTED by the resolver. This is the
     # *only* path that protects against the "direct dep is fine, but the
     # dep's dep got worm-poisoned overnight" failure mode. The
     # `uv pip install` tiers below re-resolve transitives fresh from PyPI
-    # without any hash verification — they exist to keep installs working
+    # without any hash verification -- they exist to keep installs working
     # when the lockfile is stale, missing, or out-of-sync with the
     # current extras spec, NOT because they're equivalent in posture.
     if (Test-Path "uv.lock") {
@@ -863,7 +1020,7 @@ function Install-Dependencies {
         #
         # UV_PROJECT_ENVIRONMENT pins the sync target to our venv\.
         # Without it, modern uv (>=0.5) ignores VIRTUAL_ENV for `sync`
-        # and creates a sibling .venv\ inside the repo — leaving venv\
+        # and creates a sibling .venv\ inside the repo -- leaving venv\
         # empty and producing the broken state where `hermes.exe` exists
         # in the wrong directory and imports fail with ModuleNotFoundError.
         # (Mirrors the same flag in scripts/install.sh::install_deps.)
@@ -872,7 +1029,7 @@ function Install-Dependencies {
         if ($LASTEXITCODE -eq 0) {
             Write-Success "Main package installed (hash-verified via uv.lock)"
             $script:InstalledTier = "hash-verified (uv.lock)"
-            # Skip the rest of the tiered cascade — we already have a
+            # Skip the rest of the tiered cascade -- we already have a
             # complete, hash-verified install.
             $skipPipFallback = $true
         } else {
@@ -880,22 +1037,22 @@ function Install-Dependencies {
             $skipPipFallback = $false
         }
     } else {
-        Write-Info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
+        Write-Info "uv.lock not found -- falling back to PyPI resolve (no hash verification)"
         $skipPipFallback = $false
     }
 
     # Install main package.  Tiered fallback so a single flaky transitive
     # doesn't silently drop everything.  Each tier's stdout/stderr is
-    # preserved — no Out-Null swallowing — so the user can see what failed.
+    # preserved -- no Out-Null swallowing -- so the user can see what failed.
     #
-    # Tier 1: [all] — the curated extra in pyproject.toml.
+    # Tier 1: [all] -- the curated extra in pyproject.toml.
     # Tier 2: [all] minus the currently-broken extras list ($brokenExtras).
     #         Edit $brokenExtras below when something on PyPI breaks; this
     #         lets users keep the rest of [all] when one transitive is
     #         unavailable. The list of [all]'s contents is parsed from
-    #         pyproject.toml at runtime — there is NO hand-mirrored copy
+    #         pyproject.toml at runtime -- there is NO hand-mirrored copy
     #         to drift out of sync.
-    # Tier 3: bare `.` — last-resort so at least the core CLI launches.
+    # Tier 3: bare `.` -- last-resort so at least the core CLI launches.
 
     # Currently-broken extras. Edit this list when an upstream package
     # gets quarantined / yanked / breaks resolution. Empty means everything
@@ -969,11 +1126,21 @@ except Exception:
         if (-not (Test-Path $venvPython)) {
             throw "Install reported success but $venvPython does not exist. The dependency sync likely landed in a sibling .venv\ directory. Re-run the installer; if it persists, manually: cd '$InstallDir'; Remove-Item -Recurse -Force venv,.venv; uv venv venv --python $PythonVersion; `$env:UV_PROJECT_ENVIRONMENT='$InstallDir\venv'; uv sync --extra all --locked"
         }
+        # Relax EAP=Stop while running the import probe.  Python writes
+        # deprecation warnings and import-system info to stderr; under
+        # EAP=Stop the 2>&1 merge wraps those as ErrorRecord objects and
+        # throws even when the imports succeed.  $LASTEXITCODE is the
+        # reliable signal (it's 0 iff the python invocation exited 0,
+        # regardless of what was written to stderr).
+        $prevEAP = $ErrorActionPreference
+        $ErrorActionPreference = "Continue"
         & $venvPython -c "import dotenv, openai, rich, prompt_toolkit" 2>&1 | Out-Null
-        if ($LASTEXITCODE -ne 0) {
+        $importExitCode = $LASTEXITCODE
+        $ErrorActionPreference = $prevEAP
+        if ($importExitCode -ne 0) {
             $sibling = "$InstallDir\.venv"
             $hint = if (Test-Path $sibling) {
-                "Detected sibling .venv\ at $sibling — uv synced there instead of venv\. Recover with: cd '$InstallDir'; Remove-Item -Recurse -Force venv; Move-Item .venv venv"
+                "Detected sibling .venv\ at $sibling -- uv synced there instead of venv\. Recover with: cd '$InstallDir'; Remove-Item -Recurse -Force venv; Move-Item .venv venv"
             } else {
                 "Recover with: cd '$InstallDir'; `$env:UV_PROJECT_ENVIRONMENT='$InstallDir\venv'; uv sync --extra all --locked"
             }
@@ -982,19 +1149,27 @@ except Exception:
         Write-Success "Baseline imports verified in venv"
     }
 
-    # Verify the dashboard deps specifically — they're the most common thing
+    # Verify the dashboard deps specifically -- they're the most common thing
     # users hit and lazy-import errors from `hermes dashboard` are confusing.
     # If tier 1 failed (the common case), [web] was still picked up by tiers
     # 2-3; only tier 4 leaves you without it.
     $pythonExe = if (-not $NoVenv) { "$InstallDir\venv\Scripts\python.exe" } else { (& $UvCmd python find $PythonVersion) }
     if (Test-Path $pythonExe) {
         $webOk = $false
+        # Relax EAP=Stop while running the import probe; see the matching
+        # comment on the baseline-imports check above.  Python writes
+        # deprecation warnings to stderr and we don't want those wrapped
+        # as ErrorRecords that silently force the "not importable" path
+        # even when fastapi/uvicorn are actually installed.
+        $prevEAP = $ErrorActionPreference
+        $ErrorActionPreference = "Continue"
         try {
             & $pythonExe -c "import fastapi, uvicorn" 2>&1 | Out-Null
             if ($LASTEXITCODE -eq 0) { $webOk = $true }
         } catch { }
+        $ErrorActionPreference = $prevEAP
         if (-not $webOk) {
-            Write-Warn "fastapi/uvicorn not importable — `hermes dashboard` will not work."
+            Write-Warn "fastapi/uvicorn not importable -- `hermes dashboard` will not work."
             Write-Info "Attempting targeted install of [web] extra as last resort..."
             & $UvCmd pip install -e ".[web]"
             if ($LASTEXITCODE -eq 0) {
@@ -1099,7 +1274,7 @@ function Copy-ConfigTemplates {
     # flags the BOM as an invisible unicode character and refuses to
     # load the file.  PS7's ``-Encoding utf8NoBOM`` fixes that but we
     # don't control which PowerShell version the user has.  Go direct
-    # to .NET with an explicit UTF8Encoding($false) — BOM-free on every
+    # to .NET with an explicit UTF8Encoding($false) -- BOM-free on every
     # PowerShell version.
     $soulPath = "$HermesHome\SOUL.md"
     if (-not (Test-Path $soulPath)) {
@@ -1155,7 +1330,7 @@ function Install-NodeDeps {
     # Resolve npm explicitly to npm.cmd, NOT npm.ps1.  Node.js on Windows
     # ships BOTH npm.cmd (a batch shim) and npm.ps1 (a PowerShell shim).
     # Get-Command's default ordering picks whichever comes first in PATHEXT,
-    # and on many systems that's .ps1 — but .ps1 requires scripts to be
+    # and on many systems that's .ps1 -- but .ps1 requires scripts to be
     # enabled in PowerShell's execution policy, which most Windows users
     # don't have (the Restricted / RemoteSigned default blocks unsigned
     # .ps1 files).  .cmd has no such restriction and works on every box.
@@ -1165,7 +1340,7 @@ function Install-NodeDeps {
     # returned if we can't find a .cmd sibling.
     $npmCmd = Get-Command npm -ErrorAction SilentlyContinue
     if (-not $npmCmd) {
-        Write-Warn "npm not found on PATH — skipping Node.js dependencies."
+        Write-Warn "npm not found on PATH -- skipping Node.js dependencies."
         Write-Info "Open a new PowerShell window and re-run 'hermes setup tools' later."
         return
     }
@@ -1176,7 +1351,7 @@ function Install-NodeDeps {
             Write-Info "Using npm.cmd (PowerShell execution policy blocks npm.ps1)"
             $npmExe = $npmCmdSibling
         } else {
-            Write-Warn "Only npm.ps1 available — install may fail if script execution is disabled."
+            Write-Warn "Only npm.ps1 available -- install may fail if script execution is disabled."
             Write-Info "  If it fails, either enable PS script execution or install Node via winget."
         }
     }
@@ -1193,17 +1368,40 @@ function Install-NodeDeps {
     function _Run-NpmInstall([string]$label, [string]$installDir, [string]$logPath, [string]$npmPath) {
         Push-Location $installDir
         try {
-            # Redirect ALL output streams to the log file via 2>&1 and then
-            # ``Tee-Object`` / ``Out-File``.  Simpler approach: call npm
-            # with output redirected and inspect $LASTEXITCODE afterwards.
-            & $npmPath install --silent *> $logPath
+            # Stream npm's output to BOTH the console and the log file via
+            # Tee-Object.  Previously this called ``& npm install --silent
+            # *> $logPath`` which redirected every stream to disk and left
+            # the user staring at a frozen "Installing..." line for the
+            # duration of the install.  On a fresh VM that's 1-3 minutes
+            # of total silence, indistinguishable from a hang.
+            #
+            # Tee writes the live output to stdout AND $logPath; we still
+            # capture the exit code afterwards and surface diagnostics
+            # on failure.  Note: 2>&1 merges npm's stderr into the success
+            # stream first because Tee-Object only sees the success
+            # stream of the pipeline.  ForEach-Object { "$_" } coerces
+            # each item to a string so PowerShell's NativeCommandError
+            # formatter doesn't wrap stderr lines as alarming red blocks
+            # (cosmetic polish; the underlying text is unchanged).
+            #
+            # Relax EAP around the npm invocation: with EAP=Stop (set at
+            # the top of this script), PowerShell wraps stderr lines from
+            # native commands captured via 2>&1 as ErrorRecord objects and
+            # throws on the first one -- even though npm exited 0.  This
+            # is the same issue Test-Python and Install-Uv work around
+            # for uv's stderr-emitting installer.  Check success via
+            # $LASTEXITCODE, which is reliable regardless of stderr noise.
+            $prevEAP = $ErrorActionPreference
+            $ErrorActionPreference = "Continue"
+            & $npmPath install --silent 2>&1 | ForEach-Object { "$_" } | Tee-Object -FilePath $logPath
             $code = $LASTEXITCODE
+            $ErrorActionPreference = $prevEAP
             if ($code -eq 0) {
                 Write-Success "$label dependencies installed"
                 Remove-Item -Force $logPath -ErrorAction SilentlyContinue
                 return $true
             }
-            Write-Warn "$label npm install failed — exit code $code"
+            Write-Warn "$label npm install failed -- exit code $code"
             if (Test-Path $logPath) {
                 $errText = (Get-Content $logPath -Raw -ErrorAction SilentlyContinue)
                 if ($errText) {
@@ -1218,6 +1416,7 @@ function Install-NodeDeps {
             Write-Info "Run manually later: cd `"$installDir`"; npm install"
             return $false
         } catch {
+            if ($prevEAP) { $ErrorActionPreference = $prevEAP }
             Write-Warn "$label npm install could not be launched: $_"
             return $false
         } finally {
@@ -1236,7 +1435,7 @@ function Install-NodeDeps {
         # returns False (no Chromium under %LOCALAPPDATA%\ms-playwright), and the
         # browser_* tools are silently filtered out of the agent's tool schema.
         # System Chrome at "C:\Program Files\Google\Chrome\..." is NOT used by
-        # agent-browser — it expects a Playwright-managed Chromium.
+        # agent-browser -- it expects a Playwright-managed Chromium.
         if ($browserNpmOk) {
             Write-Info "Installing browser engine (Playwright Chromium)..."
             # npx lives next to npm in the same bin dir.  Prefer .cmd to dodge
@@ -1252,19 +1451,54 @@ function Install-NodeDeps {
                 if ($npxCmd) { $npxExe = $npxCmd.Source }
             }
             if (-not $npxExe) {
-                Write-Warn "npx not found — cannot install Playwright Chromium."
+                Write-Warn "npx not found -- cannot install Playwright Chromium."
                 Write-Info "Run manually later: cd `"$InstallDir`"; npx playwright install chromium"
             } else {
                 $pwLog = "$env:TEMP\hermes-playwright-install-$(Get-Random).log"
                 Push-Location $InstallDir
                 try {
-                    & $npxExe playwright install chromium *> $pwLog
+                    # Playwright Chromium is ~170MB compressed and the
+                    # download regularly takes 3-10 minutes on a fresh
+                    # VM.  Tee the output to console + log so the user
+                    # sees download progress in real time instead of
+                    # staring at a silent prompt that looks hung.  See
+                    # _Run-NpmInstall above for the same pattern and
+                    # the rationale behind 2>&1 before the pipe.
+                    Write-Info "(this can take several minutes -- streaming progress below)"
+                    # --yes auto-accepts npx's "Need to install playwright@X.Y.Z"
+                    # confirmation prompt.  Without it, npx 7+ blocks on stdin
+                    # waiting for a y/N answer that never comes when this is
+                    # invoked through a pipeline (Tee-Object disconnects stdin
+                    # from the user's TTY), and the install hangs indefinitely
+                    # after printing "Need to install the following packages:
+                    # playwright@X.Y.Z".
+                    #
+                    # Relax EAP around the playwright invocation: playwright
+                    # emits a "Chromium downloaded to ..." success banner to
+                    # stderr after a successful install.  Under EAP=Stop, the
+                    # 2>&1 merge wraps those stderr lines as ErrorRecord
+                    # objects and throws -- causing this catch block to fire
+                    # with a mangled banner as the error message even though
+                    # the install actually succeeded.  Check $LASTEXITCODE
+                    # instead, which is the reliable signal.
+                    #
+                    # The ForEach-Object { "$_" } coercion BEFORE Tee-Object
+                    # is a cosmetic polish: with bare 2>&1, PowerShell still
+                    # renders stderr lines through its NativeCommandError
+                    # formatter (the red "npx.cmd : ..." block).  Coercing
+                    # each pipeline item to a string strips that wrapper so
+                    # the user sees clean playwright output instead of the
+                    # alarming-looking error formatting.
+                    $prevEAP = $ErrorActionPreference
+                    $ErrorActionPreference = "Continue"
+                    & $npxExe --yes playwright install chromium 2>&1 | ForEach-Object { "$_" } | Tee-Object -FilePath $pwLog
                     $pwCode = $LASTEXITCODE
+                    $ErrorActionPreference = $prevEAP
                     if ($pwCode -eq 0) {
                         Write-Success "Playwright Chromium installed (browser tools ready)"
                         Remove-Item -Force $pwLog -ErrorAction SilentlyContinue
                     } else {
-                        Write-Warn "Playwright Chromium install failed — exit code $pwCode"
+                        Write-Warn "Playwright Chromium install failed -- exit code $pwCode"
                         Write-Warn "Browser tools will not work until Chromium is installed."
                         if (Test-Path $pwLog) {
                             $pwErr = Get-Content $pwLog -Raw -ErrorAction SilentlyContinue
@@ -1280,6 +1514,7 @@ function Install-NodeDeps {
                         Write-Info "Run manually later: cd `"$InstallDir`"; npx playwright install chromium"
                     }
                 } catch {
+                    if ($prevEAP) { $ErrorActionPreference = $prevEAP }
                     Write-Warn "Playwright Chromium install could not be launched: $_"
                     Write-Info "Run manually later: cd `"$InstallDir`"; npx playwright install chromium"
                 } finally {
@@ -1307,7 +1542,7 @@ function Install-PlatformSdks {
     #    which silently skips some messaging SDKs from [messaging].
     # 2. `uv` creates the venv without pip.  If a messaging SDK ends up
     #    missing, the user can't `pip install python-telegram-bot` to
-    #    recover — pip simply isn't in their venv.
+    #    recover -- pip simply isn't in their venv.
     #
     # Strategy: bootstrap pip via `python -m ensurepip` (idempotent), then
     # for each token set in .env, verify the matching SDK imports.  If not,
@@ -1387,7 +1622,7 @@ function Install-PlatformSdks {
             Write-Info "Bootstrapping pip into venv (uv doesn't ship pip)..."
             & $pythonExe -m ensurepip --upgrade 2>&1 | Out-Null
             if ($LASTEXITCODE -ne 0) {
-                Write-Warn "ensurepip failed — can't auto-install missing SDKs."
+                Write-Warn "ensurepip failed -- can't auto-install missing SDKs."
                 Write-Info "Manual recovery: $UvCmd pip install `"$($missing[0].Spec)`""
                 return
             }
@@ -1412,20 +1647,28 @@ function Invoke-SetupWizard {
         Write-Info "Skipping setup wizard (-SkipSetup)"
         return
     }
-    
+
+    if ($NonInteractive) {
+        # The setup wizard prompts for API keys, model choice, persona, etc.
+        # Non-interactive callers (GUI installer) own that UX themselves; let
+        # them drive it after install.ps1 returns.
+        Write-Info "Skipping setup wizard (non-interactive). Configure via the GUI or 'hermes setup'."
+        return
+    }
+
     Write-Host ""
     Write-Info "Starting setup wizard..."
     Write-Host ""
-    
+
     Push-Location $InstallDir
-    
+
     # Run hermes setup using the venv Python directly (no activation needed)
     if (-not $NoVenv) {
         & ".\venv\Scripts\python.exe" -m hermes_cli.main setup
     } else {
         python -m hermes_cli.main setup
     }
-    
+
     Pop-Location
 }
 
@@ -1455,13 +1698,20 @@ function Start-GatewayIfConfigured {
         Write-Info "WhatsApp is enabled but not yet paired."
         Write-Info "Running 'hermes whatsapp' to pair via QR code..."
         Write-Host ""
-        $response = Read-Host "Pair WhatsApp now? [Y/n]"
-        if ($response -eq "" -or $response -match "^[Yy]") {
-            try {
-                & $hermesCmd whatsapp
-            } catch {
-                # Expected after pairing completes
+        # Non-interactive callers (GUI installer, CI) skip the QR-pair prompt;
+        # WhatsApp pairing requires a human looking at a phone camera, so the
+        # downstream UI is responsible for surfacing this when it makes sense.
+        if (-not $NonInteractive) {
+            $response = Read-Host "Pair WhatsApp now? [Y/n]"
+            if ($response -eq "" -or $response -match "^[Yy]") {
+                try {
+                    & $hermesCmd whatsapp
+                } catch {
+                    # Expected after pairing completes
+                }
             }
+        } else {
+            Write-Info "Skipping WhatsApp pairing prompt (non-interactive)."
         }
     }
 
@@ -1469,6 +1719,16 @@ function Start-GatewayIfConfigured {
     Write-Info "Messaging platform token detected!"
     Write-Info "The gateway handles messaging platforms and cron job execution."
     Write-Host ""
+
+    # In non-interactive mode the gateway lifecycle is the caller's problem
+    # (the GUI manages its own gateway process, CI doesn't want background
+    # services on the build agent, etc.).  Treat it like the user declined.
+    if ($NonInteractive) {
+        Write-Info "Skipping gateway autostart prompt (non-interactive)."
+        Write-Info "Start the gateway later with: hermes gateway"
+        return
+    }
+
     $response = Read-Host "Would you like to start the gateway now? [Y/n]"
 
     if ($response -eq "" -or $response -match "^[Yy]") {
@@ -1492,13 +1752,13 @@ function Start-GatewayIfConfigured {
 
 function Write-Completion {
     Write-Host ""
-    Write-Host "┌─────────────────────────────────────────────────────────┐" -ForegroundColor Green
-    Write-Host "│              ✓ Installation Complete!                   │" -ForegroundColor Green
-    Write-Host "└─────────────────────────────────────────────────────────┘" -ForegroundColor Green
+    Write-Host "+---------------------------------------------------------+" -ForegroundColor Green
+    Write-Host "|              [OK] Installation Complete!                   |" -ForegroundColor Green
+    Write-Host "+---------------------------------------------------------+" -ForegroundColor Green
     Write-Host ""
     
     # Show file locations
-    Write-Host "📁 Your files:" -ForegroundColor Cyan
+    Write-Host "* Your files:" -ForegroundColor Cyan
     Write-Host ""
     Write-Host "   Config:    " -NoNewline -ForegroundColor Yellow
     Write-Host "$HermesHome\config.yaml"
@@ -1510,9 +1770,9 @@ function Write-Completion {
     Write-Host "$HermesHome\hermes-agent\"
     Write-Host ""
     
-    Write-Host "─────────────────────────────────────────────────────────" -ForegroundColor Cyan
+    Write-Host "---------------------------------------------------------" -ForegroundColor Cyan
     Write-Host ""
-    Write-Host "🚀 Commands:" -ForegroundColor Cyan
+    Write-Host "* Commands:" -ForegroundColor Cyan
     Write-Host ""
     Write-Host "   hermes              " -NoNewline -ForegroundColor Green
     Write-Host "Start chatting"
@@ -1528,9 +1788,9 @@ function Write-Completion {
     Write-Host "Update to latest version"
     Write-Host ""
     
-    Write-Host "─────────────────────────────────────────────────────────" -ForegroundColor Cyan
+    Write-Host "---------------------------------------------------------" -ForegroundColor Cyan
     Write-Host ""
-    Write-Host "⚡ Restart your terminal for PATH changes to take effect" -ForegroundColor Yellow
+    Write-Host "[*] Restart your terminal for PATH changes to take effect" -ForegroundColor Yellow
     Write-Host ""
     
     if (-not $HasNode) {
@@ -1548,18 +1808,136 @@ function Write-Completion {
 }
 
 # ============================================================================
-# Main
+# Stage protocol
+# ============================================================================
+#
+# install.ps1 supports a small, stable "stage protocol" that lets programmatic
+# callers (the desktop GUI's onboarding wizard, CI, future install.sh, etc.)
+# drive the install one step at a time and surface progress/errors with their
+# own UI.  CLI users running the canonical `irm | iex` one-liner never
+# encounter this -- default invocation behaves exactly as before.
+#
+# Entry points:
+#
+#   install.ps1                       Interactive install (today's behavior).
+#   install.ps1 -ProtocolVersion      Emit the protocol version integer.
+#   install.ps1 -Manifest             Emit the stage manifest as JSON.
+#   install.ps1 -Stage <name>         Run one stage and emit its result.
+#   install.ps1 -NonInteractive       Disable all Read-Host prompts (also
+#                                     skips the setup wizard and the gateway
+#                                     autostart prompt).  Can be combined
+#                                     with default invocation to do a full
+#                                     non-interactive install.
+#   install.ps1 -Json                 Emit machine-readable JSON instead of
+#                                     the human-readable success banner at
+#                                     the end of a full install.
+#
+# Manifest schema (the JSON returned by -Manifest):
+#
+#   {
+#     "protocol_version": 1,
+#     "stages": [
+#       {
+#         "name": "uv",
+#         "title": "Installing uv package manager",
+#         "category": "prereqs",
+#         "needs_user_input": false
+#       },
+#       ...
+#     ]
+#   }
+#
+# Stage result (the JSON written by -Stage <name>):
+#
+#   {
+#     "stage": "uv",
+#     "ok": true,
+#     "skipped": false,
+#     "reason": null,
+#     "duration_ms": 1234
+#   }
+#
+# Exit codes:
+#
+#   0 -- success (stage ran, or stage was deliberately skipped).
+#   1 -- generic failure; the stage threw.
+#   2 -- unknown stage name passed to -Stage.
+#
+# Adding a stage:
+#
+#   1. Append an entry to $InstallStages below.
+#   2. Make sure the worker function it points at is idempotent and respects
+#      $NonInteractive when it has prompts.  Add it before "configure"
+#      (the wizard) or "gateway" (autostart) if it should run unconditionally;
+#      after those if it's optional post-install glue.
+#   3. Do NOT bump $InstallStageProtocolVersion -- adding stages is additive.
+#      Drivers iterate the manifest dynamically.
+#
 # ============================================================================
 
-function Main {
-    Write-Banner
+# Stage definitions -- the single source of truth.  Each entry maps a stable
+# stage name (the API contract drivers depend on) to the worker function that
+# implements it.  ``Title`` is what UIs show; ``Category`` lets UIs group
+# stages; ``NeedsUserInput`` tells UIs "this stage prompts -- either skip it
+# or arrange to provide answers another way."
+$InstallStages = @(
+    @{ Name = "uv";               Title = "Installing uv package manager";        Category = "prereqs";      NeedsUserInput = $false; Worker = "Stage-Uv" }
+    @{ Name = "python";           Title = "Verifying Python $PythonVersion";      Category = "prereqs";      NeedsUserInput = $false; Worker = "Stage-Python" }
+    @{ Name = "git";              Title = "Installing Git";                       Category = "prereqs";      NeedsUserInput = $false; Worker = "Stage-Git" }
+    @{ Name = "node";             Title = "Detecting Node.js";                    Category = "prereqs";      NeedsUserInput = $false; Worker = "Stage-Node" }
+    @{ Name = "system-packages";  Title = "Installing ripgrep and ffmpeg";        Category = "prereqs";      NeedsUserInput = $false; Worker = "Stage-SystemPackages" }
+    @{ Name = "repository";       Title = "Cloning Hermes repository";            Category = "install";      NeedsUserInput = $false; Worker = "Stage-Repository" }
+    @{ Name = "venv";             Title = "Creating Python virtual environment";  Category = "install";      NeedsUserInput = $false; Worker = "Stage-Venv" }
+    @{ Name = "dependencies";     Title = "Installing Python dependencies";       Category = "install";      NeedsUserInput = $false; Worker = "Stage-Dependencies" }
+    @{ Name = "node-deps";        Title = "Installing Node.js dependencies";      Category = "install";      NeedsUserInput = $false; Worker = "Stage-NodeDeps" }
+    @{ Name = "path";             Title = "Adding Hermes to PATH";                Category = "finalize";     NeedsUserInput = $false; Worker = "Stage-Path" }
+    @{ Name = "config-templates"; Title = "Writing configuration templates";      Category = "finalize";     NeedsUserInput = $false; Worker = "Stage-ConfigTemplates" }
+    @{ Name = "platform-sdks";    Title = "Installing messaging platform SDKs";   Category = "finalize";     NeedsUserInput = $false; Worker = "Stage-PlatformSdks" }
+    # Interactive stages.  In non-interactive mode these become no-ops; the
+    # caller (GUI / CI) handles the equivalent UX themselves.
+    @{ Name = "configure";        Title = "Configuring API keys and models";      Category = "post-install"; NeedsUserInput = $true;  Worker = "Stage-Configure" }
+    @{ Name = "gateway";          Title = "Starting messaging gateway";           Category = "post-install"; NeedsUserInput = $true;  Worker = "Stage-Gateway" }
+)
 
+# Stage workers -- thin wrappers that delegate to the existing Install-* /
+# Test-* / Invoke-* functions while preserving their error semantics.  Kept
+# as a separate layer so the existing functions remain callable directly
+# (helpful for one-off recovery: ``. install.ps1; Install-Venv``).
+#
+# Stages that depend on uv (anything after Stage-Uv) call Resolve-UvCmd
+# first so they work in cross-process driver mode where $script:UvCmd
+# set by Stage-Uv in a sibling powershell process is not visible here.
+# Resolve-UvCmd is a fast no-op when $script:UvCmd is already populated
+# (the default-invocation case where Main runs everything in one
+# process), and throws cleanly if uv truly isn't installed yet.
+function Stage-Uv               { if (-not (Install-Uv))     { throw "uv installation failed" } }
+function Stage-Python           { Resolve-UvCmd; if (-not (Test-Python))    { throw "Python $PythonVersion not available" } }
+function Stage-Git              { if (-not (Install-Git))    { throw "Git not available and auto-install failed -- install from https://git-scm.com/download/win then re-run" } }
+function Stage-Node             { [void](Test-Node) }
+function Stage-SystemPackages   { Install-SystemPackages }
+function Stage-Repository       { Install-Repository }
+function Stage-Venv             { Resolve-UvCmd; Install-Venv }
+function Stage-Dependencies     { Resolve-UvCmd; Install-Dependencies }
+function Stage-NodeDeps         { Install-NodeDeps }
+function Stage-Path             { Set-PathVariable }
+function Stage-ConfigTemplates  { Copy-ConfigTemplates }
+function Stage-PlatformSdks     { Resolve-UvCmd; Install-PlatformSdks }
+function Stage-Configure        { Invoke-SetupWizard }
+function Stage-Gateway          { Start-GatewayIfConfigured }
+
+function Get-InstallStage {
+    param([string]$Name)
+    foreach ($s in $InstallStages) {
+        if ($s.Name -eq $Name) { return $s }
+    }
+    return $null
+}
+
+function Step-OutOfInstallDir {
     # Windows refuses to delete a directory any shell is currently cd'd
-    # inside — and silently leaves orphan files behind, which then wedge
-    # "is this a valid git repo" probes on re-install.  If the current
-    # working dir is under $InstallDir, step out to the user's home
-    # BEFORE doing anything else.  Harmless when the user ran the
-    # installer from somewhere else.
+    # inside -- and silently leaves orphan files behind, which then wedge
+    # "is this a valid git repo" probes on re-install.  Harmless when the
+    # caller ran the installer from somewhere else.
     try {
         $currentResolved = (Get-Location).ProviderPath
         $installResolved = $null
@@ -1571,36 +1949,130 @@ function Main {
             Set-Location $env:USERPROFILE
         }
     } catch {}
-
-    if (-not (Install-Uv)) { throw "uv installation failed — cannot continue" }
-    if (-not (Test-Python)) { throw "Python $PythonVersion not available — cannot continue" }
-    if (-not (Install-Git)) { throw "Git not available and auto-install failed — install from https://git-scm.com/download/win then re-run" }
-    # Test-Node always returns $true (sets $script:HasNode on success, emits a
-    # warning on failure and continues so non-browser installs still work).
-    # Cast to [void] so the bare return value doesn't print "True" to the
-    # console between the "Node found" line and the next installer step.
-    [void](Test-Node)
-    Install-SystemPackages  # ripgrep + ffmpeg in one step
-
-    Install-Repository
-    Install-Venv
-    Install-Dependencies
-    Install-NodeDeps
-    Set-PathVariable
-    Copy-ConfigTemplates
-    Invoke-SetupWizard
-    Install-PlatformSdks
-    Start-GatewayIfConfigured
-
-    Write-Completion
 }
 
-# Wrap in try/catch so errors don't kill the terminal when run via:
-#   irm https://...install.ps1 | iex
-# (exit/throw inside iex kills the entire PowerShell session)
+function Invoke-Stage {
+    param(
+        [Parameter(Mandatory=$true)] [hashtable]$StageDef
+    )
+
+    # Refresh PATH from registry so this stage sees binaries installed by
+    # prior stages, even when each stage runs in its own powershell process.
+    # No-op in cost-relevant cases (default invocation path syncs once per
+    # foreach pass; cross-process drivers get the necessary freshening).
+    Sync-EnvPath
+
+    $start = [DateTime]::UtcNow
+    $result = @{
+        stage        = $StageDef.Name
+        ok           = $false
+        skipped      = $false
+        reason       = $null
+        duration_ms  = 0
+    }
+
+    try {
+        & $StageDef.Worker
+        $result.ok = $true
+    } catch {
+        $result.ok = $false
+        $result.reason = "$_"
+        throw
+    } finally {
+        $result.duration_ms = [int]([DateTime]::UtcNow - $start).TotalMilliseconds
+        if ($Json -or $Stage) {
+            # In stage-driver mode every stage emits a JSON line so the
+            # caller can stream progress.  In default interactive mode we
+            # stay silent here (the worker already wrote human output).
+            $result | ConvertTo-Json -Compress | Write-Output
+        }
+    }
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+function Invoke-AllStages {
+    Step-OutOfInstallDir
+    foreach ($s in $InstallStages) {
+        Invoke-Stage -StageDef $s
+    }
+}
+
+function Main {
+    Write-Banner
+    Invoke-AllStages
+    if (-not $Json) {
+        Write-Completion
+    } else {
+        @{ ok = $true; protocol_version = $InstallStageProtocolVersion } | ConvertTo-Json -Compress | Write-Output
+    }
+}
+
+# ----------------------------------------------------------------------------
+# Entry-point dispatch
+# ----------------------------------------------------------------------------
+#
+# All branches funnel through one try/catch so errors don't kill an `irm |
+# iex` PowerShell session, and so failures in stage-driver mode produce a
+# structured JSON error frame instead of a bare exception.
+
 try {
+    if ($ProtocolVersion) {
+        Write-Output $InstallStageProtocolVersion
+        exit 0
+    }
+
+    if ($Manifest) {
+        $payload = @{
+            protocol_version = $InstallStageProtocolVersion
+            stages = @($InstallStages | ForEach-Object {
+                @{
+                    name             = $_.Name
+                    title            = $_.Title
+                    category         = $_.Category
+                    needs_user_input = $_.NeedsUserInput
+                }
+            })
+        }
+        $payload | ConvertTo-Json -Depth 5 -Compress | Write-Output
+        exit 0
+    }
+
+    if ($Stage) {
+        $def = Get-InstallStage -Name $Stage
+        if (-not $def) {
+            $err = @{
+                ok     = $false
+                stage  = $Stage
+                reason = "unknown stage: $Stage. Run install.ps1 -Manifest to list valid stages."
+            }
+            $err | ConvertTo-Json -Compress | Write-Output
+            exit 2
+        }
+        Step-OutOfInstallDir
+        Invoke-Stage -StageDef $def
+        exit 0
+    }
+
+    # Default: full install (today's behavior, plus optional -NonInteractive
+    # and -Json layered on by the params above).
     Main
 } catch {
+    if ($Json -or $Stage) {
+        # Stage-driver mode: caller wants JSON they can parse.  Emit a
+        # structured error frame and exit non-zero.
+        $err = @{
+            ok     = $false
+            stage  = if ($Stage) { $Stage } else { $null }
+            reason = "$_"
+        }
+        $err | ConvertTo-Json -Compress | Write-Output
+        exit 1
+    }
+
+    # Interactive mode: keep today's friendly recovery hint.
     Write-Host ""
     Write-Err "Installation failed: $_"
     Write-Host ""
diff --git a/scripts/tests/test-install-ps1-stage-protocol.ps1 b/scripts/tests/test-install-ps1-stage-protocol.ps1
new file mode 100644
index 00000000000..4e2e7ea256e
--- /dev/null
+++ b/scripts/tests/test-install-ps1-stage-protocol.ps1
@@ -0,0 +1,134 @@
+# Smoke tests for the install.ps1 stage protocol.
+#
+# Run from a PowerShell prompt:
+#
+#   powershell -NoProfile -ExecutionPolicy Bypass -File scripts/tests/test-install-ps1-stage-protocol.ps1
+#
+# These tests only exercise the metadata surface (-ProtocolVersion, -Manifest,
+# unknown -Stage handling).  They DO NOT actually run any install stages —
+# those have heavy side effects (winget, git clone, pip install, PATH writes)
+# and are out of scope for a unit smoke test.  All three metadata commands
+# below return without invoking Main / Invoke-AllStages.
+#
+# To exercise real install stages, drive the script from a clean VM.
+
+$ErrorActionPreference = "Stop"
+$repoRoot = Split-Path -Parent (Split-Path -Parent (Split-Path -Parent $MyInvocation.MyCommand.Path))
+$installScript = Join-Path $repoRoot "scripts\install.ps1"
+
+if (-not (Test-Path $installScript)) {
+    throw "Could not locate install.ps1 at $installScript"
+}
+
+$failures = 0
+function Assert-Equal {
+    param([Parameter(Mandatory=$true)] $Expected,
+          [Parameter(Mandatory=$true)] $Actual,
+          [Parameter(Mandatory=$true)] [string]$Label)
+    if ($Expected -ne $Actual) {
+        Write-Host "FAIL: $Label" -ForegroundColor Red
+        Write-Host "  expected: $Expected"
+        Write-Host "  actual:   $Actual"
+        $script:failures++
+    } else {
+        Write-Host "OK: $Label" -ForegroundColor Green
+    }
+}
+function Assert-True {
+    param([Parameter(Mandatory=$true)] $Condition,
+          [Parameter(Mandatory=$true)] [string]$Label)
+    if (-not $Condition) {
+        Write-Host "FAIL: $Label" -ForegroundColor Red
+        $script:failures++
+    } else {
+        Write-Host "OK: $Label" -ForegroundColor Green
+    }
+}
+
+# -----------------------------------------------------------------------------
+# Test: -ProtocolVersion emits a single integer
+# -----------------------------------------------------------------------------
+Write-Host ""
+Write-Host "-- -ProtocolVersion --"
+$output = & powershell -NoProfile -ExecutionPolicy Bypass -File $installScript -ProtocolVersion
+Assert-Equal -Expected 0 -Actual $LASTEXITCODE -Label "-ProtocolVersion exits 0"
+Assert-True ($output -match '^\d+$') -Label "-ProtocolVersion emits an integer (got: $output)"
+
+# -----------------------------------------------------------------------------
+# Test: -Manifest emits valid JSON with expected shape
+# -----------------------------------------------------------------------------
+Write-Host ""
+Write-Host "-- -Manifest --"
+$manifestJson = & powershell -NoProfile -ExecutionPolicy Bypass -File $installScript -Manifest
+Assert-Equal -Expected 0 -Actual $LASTEXITCODE -Label "-Manifest exits 0"
+
+$manifest = $null
+try {
+    $manifest = $manifestJson | ConvertFrom-Json
+    Assert-True $true -Label "-Manifest output parses as JSON"
+} catch {
+    Assert-True $false -Label "-Manifest output parses as JSON (parse error: $_)"
+}
+
+if ($manifest) {
+    Assert-True ($manifest.protocol_version -is [int] -or $manifest.protocol_version -is [long]) `
+        -Label "manifest.protocol_version is an integer"
+    Assert-True ($manifest.stages.Count -gt 0) -Label "manifest.stages is non-empty"
+
+    # Every stage has the four required fields
+    $allValid = $true
+    foreach ($stage in $manifest.stages) {
+        foreach ($field in @("name", "title", "category", "needs_user_input")) {
+            if (-not ($stage.PSObject.Properties.Name -contains $field)) {
+                Write-Host "  stage missing field '$field': $($stage | ConvertTo-Json -Compress)" -ForegroundColor Red
+                $allValid = $false
+            }
+        }
+    }
+    Assert-True $allValid -Label "every stage has name/title/category/needs_user_input"
+
+    # Specific stage names that the GUI driver will rely on
+    $names = $manifest.stages | ForEach-Object { $_.name }
+    foreach ($expected in @("uv", "python", "git", "venv", "dependencies", "configure", "gateway")) {
+        Assert-True ($names -contains $expected) -Label "manifest contains stage '$expected'"
+    }
+
+    # The two known-interactive stages must declare needs_user_input
+    $interactive = $manifest.stages | Where-Object { $_.needs_user_input } | ForEach-Object { $_.name }
+    Assert-True ($interactive -contains "configure") -Label "'configure' stage flagged needs_user_input"
+    Assert-True ($interactive -contains "gateway") -Label "'gateway' stage flagged needs_user_input"
+}
+
+# -----------------------------------------------------------------------------
+# Test: unknown stage name -> exit 2, structured JSON error
+# -----------------------------------------------------------------------------
+Write-Host ""
+Write-Host "-- -Stage with unknown name --"
+$errOutput = & powershell -NoProfile -ExecutionPolicy Bypass -File $installScript -Stage "does-not-exist"
+Assert-Equal -Expected 2 -Actual $LASTEXITCODE -Label "unknown -Stage exits 2"
+
+$errFrame = $null
+try {
+    $errFrame = $errOutput | ConvertFrom-Json
+    Assert-True $true -Label "unknown-stage output parses as JSON"
+} catch {
+    Assert-True $false -Label "unknown-stage output parses as JSON (parse error: $_)"
+}
+
+if ($errFrame) {
+    Assert-Equal -Expected $false -Actual $errFrame.ok -Label "unknown-stage frame has ok=false"
+    Assert-Equal -Expected "does-not-exist" -Actual $errFrame.stage -Label "unknown-stage frame echoes stage name"
+    Assert-True ($errFrame.reason -match "unknown stage") -Label "unknown-stage frame explains why"
+}
+
+# -----------------------------------------------------------------------------
+# Summary
+# -----------------------------------------------------------------------------
+Write-Host ""
+if ($failures -gt 0) {
+    Write-Host "FAILED: $failures assertion(s) failed" -ForegroundColor Red
+    exit 1
+} else {
+    Write-Host "All smoke tests passed." -ForegroundColor Green
+    exit 0
+}

From c0b64f087750ea4a9fe11e32ad9cce21e9857e2d Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Sun, 17 May 2026 01:23:59 -0400
Subject: [PATCH 195/218] fix(install.ps1): address Copilot review on #27224

Three issues flagged by the Copilot review on this PR:

1. Double JSON emit on stage failure (Copilot #1, #2). When -Stage <name>
   ran a worker that threw, Invoke-Stage's finally emitted a JSON result
   frame AND the entry-point catch emitted a second error frame --
   producing two concatenated JSON objects on stdout and breaking the
   one-line-per-invocation contract that drivers parse against. Same
   issue applied to -Json mode on a full install (every stage's finally
   plus a final error frame missing duration_ms/skipped).

   Fix: Invoke-Stage's finally now sets $script:_StageEmittedErrorFrame
   when it emits a failure frame; the entry-point catch checks the flag
   and skips its own emit, still exit 1.

2. $prevEAP uninitialized on early try-block throw (Copilot #3). In
   Install-Uv, Test-Python, Test-Node's winget fallback,
   _Run-NpmInstall, and the playwright block, '$prevEAP =
   $ErrorActionPreference' lived as the first statement INSIDE the
   try. If anything between 'try {' and that line threw (Write-Info on
   an unusual host, the npx-finding loop, etc.), the catch's
   'if ($prevEAP) { ... }' restore was a no-op and EAP could remain
   relaxed.

   Fix: hoist '$prevEAP = $ErrorActionPreference' to the line
   immediately before 'try {' in all five sites. Catch's restore is
   now always meaningful regardless of where in the try the throw
   originated.

No change to Invoke-Stage's success path or to the four lint-clean EAP
sites (Test-Node was the only winget-related catch). All 19 metadata
smoke tests still pass.
---
 scripts/install.ps1 | 49 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 4d7545ca689..f2914575e84 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -139,6 +139,11 @@ function Install-Uv {
     
     # Install uv
     Write-Info "Installing uv (fast Python package manager)..."
+    # Capture EAP outside the try block so the catch's restore call always
+    # has a meaningful value -- if the assignment lived inside try and the
+    # try body threw before reaching it, the catch would see $prevEAP
+    # unset and leave EAP at whatever the previous protected call set.
+    $prevEAP = $ErrorActionPreference
     try {
         # Relax ErrorActionPreference around the nested astral installer.
         # The astral installer (a separate `powershell -c "irm ... | iex"`)
@@ -151,7 +156,6 @@ function Install-Uv {
         # pattern Test-Python uses for `uv python install`; verify success
         # via Test-Path on the expected binary afterwards, which is more
         # reliable than exit-code/stderr signal anyway.
-        $prevEAP = $ErrorActionPreference
         $ErrorActionPreference = "Continue"
         powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 2>&1 | Out-Null
         $ErrorActionPreference = $prevEAP
@@ -267,6 +271,9 @@ function Test-Python {
     
     # Python not found -- use uv to install it (no admin needed!)
     Write-Info "Python $PythonVersion not found, installing via uv..."
+    # Capture EAP outside the try block so the catch's restore call always
+    # has a meaningful value (see Install-Uv for the full rationale).
+    $prevEAP = $ErrorActionPreference
     try {
         # Temporarily relax ErrorActionPreference: uv writes download progress
         # ("Downloading cpython-3.11.15-windows-x86_64-none (24.5MiB)") to
@@ -278,7 +285,6 @@ function Test-Python {
         # afterwards, which is the reliable signal regardless of exit-code
         # semantics or stderr noise.  This fix was previously landed as
         # commit ec1714e71 and then lost in a release squash; reapplied here.
-        $prevEAP = $ErrorActionPreference
         $ErrorActionPreference = "Continue"
         $uvOutput = & $UvCmd python install $PythonVersion 2>&1
         $uvExitCode = $LASTEXITCODE
@@ -656,12 +662,14 @@ function Test-Node {
     # locked firewall, etc.) but the user is willing to consent to UAC.
     if (Get-Command winget -ErrorAction SilentlyContinue) {
         Write-Info "Falling back to winget (may prompt UAC -- check your taskbar for a flashing icon)..."
+        # Capture EAP outside the try block so the catch's restore call always
+        # has a meaningful value (see Install-Uv for the full rationale).
+        $prevEAP = $ErrorActionPreference
         try {
             # Relax EAP=Stop so stderr lines from winget don't get wrapped
             # as ErrorRecords and short-circuit the 2>&1 pipe before we can
             # check the post-condition.  See the long comment in Install-Uv
             # for the same pattern.
-            $prevEAP = $ErrorActionPreference
             $ErrorActionPreference = "Continue"
             winget install OpenJS.NodeJS.LTS --silent --accept-package-agreements --accept-source-agreements 2>&1 | Out-Null
             $ErrorActionPreference = $prevEAP
@@ -1367,6 +1375,9 @@ function Install-NodeDeps {
     # it works uniformly for npm.cmd, npx.cmd, and bare .exe files.
     function _Run-NpmInstall([string]$label, [string]$installDir, [string]$logPath, [string]$npmPath) {
         Push-Location $installDir
+        # Capture EAP outside the try block so the catch's restore call always
+        # has a meaningful value (see Install-Uv for the full rationale).
+        $prevEAP = $ErrorActionPreference
         try {
             # Stream npm's output to BOTH the console and the log file via
             # Tee-Object.  Previously this called ``& npm install --silent
@@ -1391,7 +1402,6 @@ function Install-NodeDeps {
             # is the same issue Test-Python and Install-Uv work around
             # for uv's stderr-emitting installer.  Check success via
             # $LASTEXITCODE, which is reliable regardless of stderr noise.
-            $prevEAP = $ErrorActionPreference
             $ErrorActionPreference = "Continue"
             & $npmPath install --silent 2>&1 | ForEach-Object { "$_" } | Tee-Object -FilePath $logPath
             $code = $LASTEXITCODE
@@ -1456,6 +1466,10 @@ function Install-NodeDeps {
             } else {
                 $pwLog = "$env:TEMP\hermes-playwright-install-$(Get-Random).log"
                 Push-Location $InstallDir
+                # Capture EAP outside the try block so the catch's restore call
+                # always has a meaningful value (see Install-Uv for the full
+                # rationale).
+                $prevEAP = $ErrorActionPreference
                 try {
                     # Playwright Chromium is ~170MB compressed and the
                     # download regularly takes 3-10 minutes on a fresh
@@ -1489,7 +1503,6 @@ function Install-NodeDeps {
                     # each pipeline item to a string strips that wrapper so
                     # the user sees clean playwright output instead of the
                     # alarming-looking error formatting.
-                    $prevEAP = $ErrorActionPreference
                     $ErrorActionPreference = "Continue"
                     & $npxExe --yes playwright install chromium 2>&1 | ForEach-Object { "$_" } | Tee-Object -FilePath $pwLog
                     $pwCode = $LASTEXITCODE
@@ -1985,6 +1998,13 @@ function Invoke-Stage {
             # caller can stream progress.  In default interactive mode we
             # stay silent here (the worker already wrote human output).
             $result | ConvertTo-Json -Compress | Write-Output
+            # Tell the entry-point catch that we've already emitted a
+            # frame for this failure (when $result.ok = $false), so it
+            # doesn't double-emit a second JSON object and break the
+            # one-line-per-stage contract the driver protocol promises.
+            if (-not $result.ok) {
+                $script:_StageEmittedErrorFrame = $true
+            }
         }
     }
 }
@@ -2062,13 +2082,20 @@ try {
 } catch {
     if ($Json -or $Stage) {
         # Stage-driver mode: caller wants JSON they can parse.  Emit a
-        # structured error frame and exit non-zero.
-        $err = @{
-            ok     = $false
-            stage  = if ($Stage) { $Stage } else { $null }
-            reason = "$_"
+        # structured error frame and exit non-zero -- BUT only if
+        # Invoke-Stage didn't already emit one for this same failure.
+        # The inner finally emits the authoritative per-stage frame
+        # (with duration_ms + skipped fields); a second emit here
+        # would produce two concatenated JSON objects on stdout and
+        # break drivers that parse one-line-per-invocation.
+        if (-not $script:_StageEmittedErrorFrame) {
+            $err = @{
+                ok     = $false
+                stage  = if ($Stage) { $Stage } else { $null }
+                reason = "$_"
+            }
+            $err | ConvertTo-Json -Compress | Write-Output
         }
-        $err | ConvertTo-Json -Compress | Write-Output
         exit 1
     }
 

From 3925be2791038e29fc9d1fc10c3fd403a8d5bed7 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 22:52:28 -0700
Subject: [PATCH 196/218] fix(install.ps1): trim completion banner + strip
 em-dash in test

Address the two cosmetic items from review:

- Completion banner middle line was 62 chars vs 59-char top/bottom borders
  (replacing the 1-char checkmark with [OK] added width that wasn't
  reflected in the trailing whitespace).  Drop 3 trailing spaces.
- Smoke test file had a single em-dash in a comment -- the only
  non-ASCII byte across both files.  Replace with -- for consistency
  with install.ps1's pure-ASCII goal.
---
 scripts/install.ps1                               | 2 +-
 scripts/tests/test-install-ps1-stage-protocol.ps1 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index f2914575e84..b23ac54f736 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -1766,7 +1766,7 @@ function Start-GatewayIfConfigured {
 function Write-Completion {
     Write-Host ""
     Write-Host "+---------------------------------------------------------+" -ForegroundColor Green
-    Write-Host "|              [OK] Installation Complete!                   |" -ForegroundColor Green
+    Write-Host "|              [OK] Installation Complete!                |" -ForegroundColor Green
     Write-Host "+---------------------------------------------------------+" -ForegroundColor Green
     Write-Host ""
     
diff --git a/scripts/tests/test-install-ps1-stage-protocol.ps1 b/scripts/tests/test-install-ps1-stage-protocol.ps1
index 4e2e7ea256e..b8fa5271ce6 100644
--- a/scripts/tests/test-install-ps1-stage-protocol.ps1
+++ b/scripts/tests/test-install-ps1-stage-protocol.ps1
@@ -5,7 +5,7 @@
 #   powershell -NoProfile -ExecutionPolicy Bypass -File scripts/tests/test-install-ps1-stage-protocol.ps1
 #
 # These tests only exercise the metadata surface (-ProtocolVersion, -Manifest,
-# unknown -Stage handling).  They DO NOT actually run any install stages —
+# unknown -Stage handling).  They DO NOT actually run any install stages --
 # those have heavy side effects (winget, git clone, pip install, PATH writes)
 # and are out of scope for a unit smoke test.  All three metadata commands
 # below return without invoking Main / Invoke-AllStages.

From fb138d91ca34c3e2e49ce67f3187da6feeedbbdd Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 22:54:56 -0700
Subject: [PATCH 197/218] fix(install.ps1): Stage-Node honest reporting +
 reject empty -Stage

Two protocol-correctness gaps from review:

1. Stage-Node used [void](Test-Node) which discarded Test-Node's return
   value, so the JSON frame always reported ok=true even when Node
   install fully failed.  A GUI driver consuming the manifest couldn't
   tell 'node ready' from 'node missing'.  Wire a soft-skip channel
   ($script:_StageSkippedReason) that workers can populate to surface
   'ran, but the thing it was supposed to set up is not available' as
   skipped=true with a reason in the JSON, without aborting the install
   (Node is optional -- browser tools degrade gracefully, matches
   Write-Completion's existing 'Note: Node.js could not be installed'
   behavior).  Reset before each stage so a prior reason can't leak.

2. The -Stage dispatch used 'if ($Stage)' which is falsy for empty
   string, so 'install.ps1 -Stage ""' fell through to Main and silently
   kicked off a full destructive install.  Switch to
   PSBoundParameters.ContainsKey('Stage') so an explicit empty value
   surfaces as unknown-stage exit 2 with a structured JSON frame, the
   way every other bad stage name does.
---
 scripts/install.ps1 | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index b23ac54f736..c774e9a860c 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -1926,7 +1926,17 @@ $InstallStages = @(
 function Stage-Uv               { if (-not (Install-Uv))     { throw "uv installation failed" } }
 function Stage-Python           { Resolve-UvCmd; if (-not (Test-Python))    { throw "Python $PythonVersion not available" } }
 function Stage-Git              { if (-not (Install-Git))    { throw "Git not available and auto-install failed -- install from https://git-scm.com/download/win then re-run" } }
-function Stage-Node             { [void](Test-Node) }
+# Node is optional (browser tools degrade gracefully without it).  Surface
+# failure to the JSON contract as skipped=true / reason rather than ok=true,
+# so a GUI driver consuming the manifest can distinguish "node ready" from
+# "node missing".  Install flow continues either way -- matches the
+# existing Write-Completion behavior that prints a "Note: Node.js could
+# not be installed" hint instead of aborting.
+function Stage-Node             {
+    if (-not (Test-Node)) {
+        $script:_StageSkippedReason = "Node.js not available; browser tools will be unavailable until node is installed manually from https://nodejs.org/en/download/"
+    }
+}
 function Stage-SystemPackages   { Install-SystemPackages }
 function Stage-Repository       { Install-Repository }
 function Stage-Venv             { Resolve-UvCmd; Install-Venv }
@@ -1975,6 +1985,15 @@ function Invoke-Stage {
     # foreach pass; cross-process drivers get the necessary freshening).
     Sync-EnvPath
 
+    # Per-stage soft-skip channel.  A worker can populate
+    # $script:_StageSkippedReason to surface "ran, but the thing it was
+    # supposed to set up is not available" as skipped=true in the JSON
+    # frame, without throwing.  Used by Stage-Node so the install flow
+    # doesn't abort when an optional capability is missing while still
+    # being honest in the protocol contract.  Reset before each stage so
+    # a prior stage's reason can never leak into a later stage's frame.
+    $script:_StageSkippedReason = $null
+
     $start = [DateTime]::UtcNow
     $result = @{
         stage        = $StageDef.Name
@@ -1987,6 +2006,10 @@ function Invoke-Stage {
     try {
         & $StageDef.Worker
         $result.ok = $true
+        if ($script:_StageSkippedReason) {
+            $result.skipped = $true
+            $result.reason  = $script:_StageSkippedReason
+        }
     } catch {
         $result.ok = $false
         $result.reason = "$_"
@@ -2060,7 +2083,12 @@ try {
         exit 0
     }
 
-    if ($Stage) {
+    # Use PSBoundParameters rather than $Stage truthiness so that an
+    # explicit `-Stage ""` from a misbehaving driver doesn't fall through
+    # to the full-install Main path and silently kick off a destructive
+    # operation.  Empty string is a contract violation; surface it as
+    # unknown-stage exit 2 with a structured JSON frame.
+    if ($PSBoundParameters.ContainsKey("Stage")) {
         $def = Get-InstallStage -Name $Stage
         if (-not $def) {
             $err = @{

From 8d4766afcae676efba0269787ddad7c769ba6c24 Mon Sep 17 00:00:00 2001
From: Sylw3ster <sylw3st3rr@gmail.com>
Date: Sat, 16 May 2026 02:08:40 +0300
Subject: [PATCH 198/218] fix(api_server): coerce stringified booleans in
 request payloads

---
 gateway/platforms/api_server.py       | 40 ++++++++++--
 tests/gateway/test_api_server.py      | 87 +++++++++++++++++++++++++++
 tests/gateway/test_api_server_runs.py | 22 +++++++
 3 files changed, 145 insertions(+), 4 deletions(-)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 809d6cd8a03..ebd4f014690 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -71,6 +71,35 @@ def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int:
         return default
 
 
+_TRUE_REQUEST_BOOL_STRINGS = frozenset({"1", "true", "yes", "on"})
+_FALSE_REQUEST_BOOL_STRINGS = frozenset({"0", "false", "no", "off"})
+
+
+def _coerce_request_bool(value: Any, default: bool = False) -> bool:
+    """Normalize boolean-like API payload values.
+
+    External clients should send real JSON booleans, but some OpenAI-compatible
+    frontends and middleware serialize flags like ``stream`` as strings.  Using
+    Python truthiness on those values misroutes requests because ``"false"`` is
+    still truthy.  Treat only explicit bool-ish scalars as booleans; everything
+    else falls back to the caller's default.
+    """
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return default
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in _TRUE_REQUEST_BOOL_STRINGS:
+            return True
+        if normalized in _FALSE_REQUEST_BOOL_STRINGS:
+            return False
+        return default
+    if isinstance(value, (int, float)):
+        return bool(value)
+    return default
+
+
 def _normalize_chat_content(
     content: Any, *, _max_depth: int = 10, _depth: int = 0,
 ) -> str:
@@ -1005,7 +1034,7 @@ class APIServerAdapter(BasePlatformAdapter):
                 status=400,
             )
 
-        stream = body.get("stream", False)
+        stream = _coerce_request_bool(body.get("stream"), default=False)
 
         # Extract system message (becomes ephemeral system prompt layered ON TOP of core)
         system_prompt = None
@@ -2082,7 +2111,7 @@ class APIServerAdapter(BasePlatformAdapter):
         instructions = body.get("instructions")
         previous_response_id = body.get("previous_response_id")
         conversation = body.get("conversation")
-        store = body.get("store", True)
+        store = _coerce_request_bool(body.get("store"), default=True)
 
         # conversation and previous_response_id are mutually exclusive
         if conversation and previous_response_id:
@@ -2165,7 +2194,7 @@ class APIServerAdapter(BasePlatformAdapter):
         # groups the entire conversation under one session entry.
         session_id = stored_session_id or str(uuid.uuid4())
 
-        stream = bool(body.get("stream", False))
+        stream = _coerce_request_bool(body.get("stream"), default=False)
         if stream:
             # Streaming branch — emit OpenAI Responses SSE events as the
             # agent runs so frontends can render text deltas and tool
@@ -3228,7 +3257,10 @@ class APIServerAdapter(BasePlatformAdapter):
                 status=409,
             )
 
-        resolve_all = bool(body.get("all") or body.get("resolve_all"))
+        resolve_all = (
+            _coerce_request_bool(body.get("all"), default=False)
+            or _coerce_request_bool(body.get("resolve_all"), default=False)
+        )
         try:
             from tools.approval import resolve_gateway_approval
 
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 032af7109a5..7d08d64bb32 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -704,6 +704,37 @@ class TestChatCompletionsEndpoint:
                 assert "[DONE]" in body
                 assert "Hello!" in body
 
+    @pytest.mark.asyncio
+    async def test_stream_string_false_returns_json_completion(self, adapter):
+        """Quoted false must not route chat completions into SSE mode."""
+        mock_result = {
+            "final_response": "Hello! How can I help you today?",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (
+                    mock_result,
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "hermes-agent",
+                        "messages": [{"role": "user", "content": "Hello"}],
+                        "stream": "false",
+                    },
+                )
+
+            assert resp.status == 200
+            assert "text/event-stream" not in resp.headers.get("Content-Type", "")
+            data = await resp.json()
+            assert data["object"] == "chat.completion"
+            assert data["choices"][0]["message"]["content"] == mock_result["final_response"]
+
     @pytest.mark.asyncio
     async def test_stream_task_done_callback_enqueues_eos_for_chat_completions(self, adapter):
         """Regression guard for #24451: completion callback must signal SSE EOS."""
@@ -1655,6 +1686,31 @@ class TestResponsesEndpoint:
             # The response has an ID but it shouldn't be retrievable
             assert adapter._response_store.get(data["id"]) is None
 
+    @pytest.mark.asyncio
+    async def test_store_string_false_does_not_store(self, adapter):
+        """Quoted false must preserve ephemeral store=false semantics."""
+        mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (
+                    mock_result,
+                    {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
+                )
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={
+                        "model": "hermes-agent",
+                        "input": "Hello",
+                        "store": "false",
+                    },
+                )
+
+            assert resp.status == 200
+            data = await resp.json()
+            assert adapter._response_store.get(data["id"]) is None
+
     @pytest.mark.asyncio
     async def test_instructions_inherited_from_previous(self, adapter):
         """If no instructions provided, carry forward from previous response."""
@@ -1749,6 +1805,37 @@ class TestResponsesStreaming:
                 assert "Hello" in body
                 assert " world" in body
 
+    @pytest.mark.asyncio
+    async def test_stream_string_false_returns_json_response(self, adapter):
+        """Quoted false must not route Responses API requests into SSE mode."""
+        mock_result = {
+            "final_response": "Paris is the capital of France.",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (
+                    mock_result,
+                    {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
+                )
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={
+                        "model": "hermes-agent",
+                        "input": "What is the capital of France?",
+                        "stream": "false",
+                    },
+                )
+
+            assert resp.status == 200
+            assert "text/event-stream" not in resp.headers.get("Content-Type", "")
+            data = await resp.json()
+            assert data["object"] == "response"
+            assert data["output"][0]["content"][0]["text"] == mock_result["final_response"]
+
     @pytest.mark.asyncio
     async def test_stream_task_done_callback_enqueues_eos_for_responses(self, adapter):
         """Regression guard for #24451 on /v1/responses streaming path."""
diff --git a/tests/gateway/test_api_server_runs.py b/tests/gateway/test_api_server_runs.py
index bdb00d74a7b..8e7169a658d 100644
--- a/tests/gateway/test_api_server_runs.py
+++ b/tests/gateway/test_api_server_runs.py
@@ -335,6 +335,28 @@ class TestRunEvents:
                     "approval_not_pending",
                 }
 
+    @pytest.mark.asyncio
+    async def test_approval_string_false_does_not_resolve_all(self, adapter):
+        """Quoted false must not fan out approval resolution across the queue."""
+        app = _create_runs_app(adapter)
+        run_id = "run_bool_parse"
+        adapter._run_statuses[run_id] = {"run_id": run_id, "status": "running"}
+        adapter._run_approval_sessions[run_id] = "session-123"
+
+        async with TestClient(TestServer(app)) as cli:
+            with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+                approval_resp = await cli.post(
+                    f"/v1/runs/{run_id}/approval",
+                    json={"choice": "once", "all": "false"},
+                )
+
+        assert approval_resp.status == 200
+        mock_resolve.assert_called_once_with(
+            "session-123",
+            "once",
+            resolve_all=False,
+        )
+
     @pytest.mark.asyncio
     async def test_events_not_found_returns_404(self, adapter):
         app = _create_runs_app(adapter)

From bde3c7982c30796f8709cb0041d34ab36a4d7a9c Mon Sep 17 00:00:00 2001
From: darvsum <darvsum@users.noreply.github.com>
Date: Sat, 16 May 2026 13:18:01 +0800
Subject: [PATCH 199/218] fix: preserve discover_models in
 _normalize_custom_provider_entry

The _normalize_custom_provider_entry() function was dropping the
discover_models field from custom_provider entries because:

1. It was not listed in _KNOWN_KEYS, so it was logged as an
   unknown key and ignored.
2. The function builds the normalized dict by explicitly copying
   known fields, so even if the warning was suppressed, the value
   was not carried through.

This caused downstream model_switch.py to default discover_models
to True, triggering /models HTTP probes on unreachable endpoints.
With 4 unreachable internal endpoints at ~6s timeout each, the
/api/model/options endpoint took ~24s instead of <1s.
---
 hermes_cli/config.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c41158e42ae..e4447183746 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -2914,6 +2914,7 @@ def _normalize_custom_provider_entry(
         "api_mode", "transport", "model", "default_model", "models",
         "context_length", "rate_limit_delay",
         "request_timeout_seconds", "stale_timeout_seconds",
+        "discover_models",
     }
     for camel, snake in _CAMEL_ALIASES.items():
         if camel in entry and snake not in entry:
@@ -3004,6 +3005,10 @@ def _normalize_custom_provider_entry(
     if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
         normalized["rate_limit_delay"] = rate_limit_delay
 
+    discover_models = entry.get("discover_models")
+    if isinstance(discover_models, bool):
+        normalized["discover_models"] = discover_models
+
     return normalized
 
 
From 75e5d0f6bd412ff4ae719a6ebd98bfd5a471f66c Mon Sep 17 00:00:00 2001
From: hueilau <33933019+hueilau@users.noreply.github.com>
Date: Sat, 16 May 2026 23:02:46 -0700
Subject: [PATCH 200/218] fix: strip image parts for non-vision models with
 provider profiles

_propare_messages_for_non_vision_model() was only called in the legacy
flag path (no provider profile). Providers with registered profiles
(e.g. DeepSeek, Kimi) bypassed the strip, causing HTTP 400 errors when
image_url content blocks reached their non-vision APIs.

This mirrors the existing behavior in the legacy path, ensuring all
non-vision models get image stripping regardless of profile status.
Vision-capable models are unaffected (the function is a no-op for them).
---
 run_agent.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index b239f2aeb60..5e0a9ec06ac 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -10033,6 +10033,11 @@ class AIAgent:
             if _ephemeral_out is not None:
                 self._ephemeral_max_output_tokens = None
 
+            # Strip image parts for non-vision models that have provider profiles
+            # (e.g. DeepSeek, Kimi). The legacy path below already does this, but
+            # registered providers with profiles were bypassing the strip.
+            api_messages = self._prepare_messages_for_non_vision_model(api_messages)
+
             return _ct.build_kwargs(
                 model=self.model,
                 messages=api_messages,

From 5338250dab14b3e4f9dfb306446e8c55835adfad Mon Sep 17 00:00:00 2001
From: Timur00Kh <32297275+Timur00Kh@users.noreply.github.com>
Date: Sun, 17 May 2026 00:28:24 +0400
Subject: [PATCH 201/218] fix(gateway): add direct_messages_topic_id for
 synthetic Telegram DM events

When /goal loop generates synthetic MessageEvents (goal continuations,
status notices), the reply anchor is unavailable (message_id=None). For
Telegram DM topic lanes, the Telegram adapter requires
direct_messages_topic_id to route messages correctly; without it, the
adapter falls back to message_thread_id=None, sending messages to the
root 'All Messages' thread instead of the active topic lane.

The fix includes direct_messages_topic_id in thread metadata for all
non-General Telegram DM topics, ensuring queued/synthetic messages are
delivered to the correct thread even when no reply anchor exists.
---
 gateway/run.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index 458603c3115..56185190e26 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -12546,6 +12546,12 @@ class GatewayRunner:
             and getattr(source, "chat_type", None) == "dm"
         ):
             metadata["telegram_dm_topic_reply_fallback"] = True
+            # Telegram DM topic lanes need direct_messages_topic_id in metadata
+            # so synthetic/queued messages (goal continuations, status notices)
+            # route to the correct topic even when reply anchor is unavailable.
+            tid = str(thread_id)
+            if tid and tid not in {"", "1"}:
+                metadata["direct_messages_topic_id"] = tid
             anchor = reply_to_message_id or getattr(source, "message_id", None)
             if anchor is not None:
                 metadata["telegram_reply_to_message_id"] = str(anchor)

From 8bf09455dc498581fe6dea21402ee2a9238a2212 Mon Sep 17 00:00:00 2001
From: Grogger <al.bellemare@gmail.com>
Date: Sat, 16 May 2026 12:06:09 -0400
Subject: [PATCH 202/218] fix(windows): suppress console window flash on
 subprocess spawns

Add creationflags=CREATE_NO_WINDOW to every Windows Popen call
across the terminal, process registry, code execution, and kanban
worker subsystems. Prevents visible CMD windows from flashing on
the user's desktop during agent operation.

Also adds the _IS_WINDOWS module constant to kanban_db.py where
it was missing, for consistency with the other patched files.

5 Popen sites across 4 files:
- tools/environments/local.py (terminal foreground spawn)
- tools/process_registry.py (background process spawn)
- tools/code_execution_tool.py (sandbox + interpreter probe)
- hermes_cli/kanban_db.py (kanban worker spawn)
---
 hermes_cli/kanban_db.py      | 2 ++
 tools/code_execution_tool.py | 2 ++
 tools/environments/local.py  | 1 +
 tools/process_registry.py    | 1 +
 4 files changed, 6 insertions(+)

diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py
index 0db694ff5b1..9d5ddad6ed0 100644
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@@ -93,6 +93,7 @@ from toolsets import get_toolset_names
 VALID_STATUSES = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
 VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"}
 KNOWN_TOOLSET_NAMES = frozenset(name.casefold() for name in get_toolset_names())
+_IS_WINDOWS = sys.platform == "win32"
 
 # A running task's claim is valid for 15 minutes; after that the next
 # dispatcher tick reclaims it.  Workers that outlive this window should call
@@ -4024,6 +4025,7 @@ def _default_spawn(
             stderr=subprocess.STDOUT,
             env=env,
             start_new_session=True,
+            creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
         )
     except FileNotFoundError:
         log_f.close()
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 3822ce539f2..bdbc4bfbe1b 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -1238,6 +1238,7 @@ def execute_code(
             stderr=subprocess.PIPE,
             stdin=subprocess.DEVNULL,
             preexec_fn=None if _IS_WINDOWS else os.setsid,
+            creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
         )
 
         # --- Poll loop: watch for exit, timeout, and interrupt ---
@@ -1568,6 +1569,7 @@ def _is_usable_python(python_path: str) -> bool:
              "import sys; sys.exit(0 if sys.version_info >= (3, 8) else 1)"],
             timeout=5,
             capture_output=True,
+            creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
         )
         return result.returncode == 0
     except (OSError, subprocess.TimeoutExpired, subprocess.SubprocessError):
diff --git a/tools/environments/local.py b/tools/environments/local.py
index 3b9d65449fa..177e5efab15 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -513,6 +513,7 @@ class LocalEnvironment(BaseEnvironment):
             stderr=subprocess.STDOUT,
             stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
             preexec_fn=None if _IS_WINDOWS else os.setsid,
+            creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
             cwd=_popen_cwd,
         )
         if not _IS_WINDOWS:
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 184939adf75..8429a71e087 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -557,6 +557,7 @@ class ProcessRegistry:
             stderr=subprocess.STDOUT,
             stdin=subprocess.PIPE,
             preexec_fn=None if _IS_WINDOWS else os.setsid,
+            creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
         )
 
         session.process = proc

From 6f50c26b2a0254275e8f79a30e8c950cece81ed5 Mon Sep 17 00:00:00 2001
From: lemassykoi <16377344+lemassykoi@users.noreply.github.com>
Date: Sat, 16 May 2026 23:02:46 -0700
Subject: [PATCH 203/218] fix(model-switch): probe /models for custom providers
 without api_key

The Telegram/Discord model picker skipped live model discovery for
custom providers (llama.cpp, Ollama) unless an api_key was configured.
Local providers typically don't require auth on the /models endpoint.

The CLI always probes /models, so this brings the gateway picker into
parity.

Change: `if api_url and api_key:` -> `if api_url:`
---
 hermes_cli/model_switch.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index fec1f33d092..a5d299165fc 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -1688,7 +1688,11 @@ def list_authenticated_providers(
                 continue
             # Live model discovery from custom provider endpoints (matches
             # Section 3 behavior for user ``providers:`` entries).
-            if api_url and api_key:
+            # Also probes when no api_key is set (e.g. local llama.cpp /
+            # Ollama servers) — the /models endpoint often works without
+            # auth.  The CLI's _model_flow_named_custom always probes, so
+            # the Telegram/Discord picker should do the same for parity.
+            if api_url:
                 try:
                     from hermes_cli.models import fetch_api_models
 

From 6158964ff69c0c3ec0ee37fd5de5221b65ac7bcf Mon Sep 17 00:00:00 2001
From: draplater <6349758+draplater@users.noreply.github.com>
Date: Sat, 16 May 2026 23:02:46 -0700
Subject: [PATCH 204/218] feat: inject current time into goal judge prompt

The goal judge only receives the goal text and the agent's last
response. It has no concept of the current time, making it
impossible to evaluate time-sensitive goals like 'keep working
until 5pm'.

This commit adds 'Current time' to both JUDGE_USER_PROMPT_TEMPLATE
and JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE, computed from
datetime.now().astimezone() at judge call time.
---
 hermes_cli/goals.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py
index 62ee00547c1..d6a139419a7 100644
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@@ -34,6 +34,7 @@ import logging
 import re
 import time
 from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Tuple
 
 logger = logging.getLogger(__name__)
@@ -110,6 +111,7 @@ JUDGE_SYSTEM_PROMPT = (
 JUDGE_USER_PROMPT_TEMPLATE = (
     "Goal:\n{goal}\n\n"
     "Agent's most recent response:\n{response}\n\n"
+    "Current time: {current_time}\n\n"
     "Is the goal satisfied?"
 )
 
@@ -120,6 +122,7 @@ JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE = (
     "Additional criteria the user added mid-loop (all must also be "
     "satisfied for the goal to be DONE):\n{subgoals_block}\n\n"
     "Agent's most recent response:\n{response}\n\n"
+    "Current time: {current_time}\n\n"
     "Decision: For each numbered criterion above, find concrete "
     "evidence in the agent's response that the criterion is "
     "satisfied. Do not accept generic phrases like 'all requirements "
@@ -415,6 +418,7 @@ def judge_goal(
 
     # Build the prompt — pick the with-subgoals variant when applicable.
     clean_subgoals = [s.strip() for s in (subgoals or []) if s and s.strip()]
+    current_time = datetime.now(tz=timezone.utc).astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
     if clean_subgoals:
         subgoals_block = "\n".join(
             f"- {i}. {text}" for i, text in enumerate(clean_subgoals, start=1)
@@ -423,11 +427,13 @@ def judge_goal(
             goal=_truncate(goal, 2000),
             subgoals_block=_truncate(subgoals_block, 2000),
             response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
+            current_time=current_time,
         )
     else:
         prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
             goal=_truncate(goal, 2000),
             response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
+            current_time=current_time,
         )
 
     try:

From 7a7e78a360464b30ba9e9a20525681977b0f2095 Mon Sep 17 00:00:00 2001
From: pr7426 <pr7426@users.noreply.github.com>
Date: Sun, 17 May 2026 02:15:45 +0800
Subject: [PATCH 205/218] fix(cron): prevent parallel job result loss on
 exception

Replace generator-based result collection with explicit per-future
handling. Each future is now processed independently with a 600s timeout.

Before: _results.extend(f.result() for f in _futures)
- One exception stops the generator, remaining results are lost
- No timeout: one hung job blocks the entire tick

After: as_completed() + per-future try/except
- Each future handled independently
- 600s timeout prevents indefinite blocking
- Failed futures are logged and counted as failures
---
 cron/scheduler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index d470e8c2c74..322fa64906f 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -1802,7 +1802,12 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                 for job in parallel_jobs:
                     _ctx = contextvars.copy_context()
                     _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-                _results.extend(f.result() for f in _futures)
+                for f in concurrent.futures.as_completed(_futures, timeout=600):
+                    try:
+                        _results.append(f.result())
+                    except Exception as exc:
+                        logger.error("Parallel cron job future failed: %s", exc)
+                        _results.append(False)
 
         # Best-effort sweep of MCP stdio subprocesses that survived their
         # session teardown during this tick.  Runs AFTER every job has

From a52f014a8cdefb72d81ca0e1d1208571dc3512d2 Mon Sep 17 00:00:00 2001
From: Rahul <rahulnilvan43@gmail.com>
Date: Fri, 15 May 2026 13:45:07 +0530
Subject: [PATCH 206/218] fix(tests): mock keychain in
 TestReadClaudeCodeCredentials to prevent credential leakage

Tests in TestReadClaudeCodeCredentials were not mocking
_read_claude_code_credentials_from_keychain, which was added after the
tests were written. On macOS machines with real Claude Code credentials
stored in the Keychain, the function returns live credentials instead of
the test fixtures, causing assertions to fail and leaking real tokens in
test output.

Add an autouse fixture that stubs the keychain reader to None so all
tests in the class exercise only the file-based credential path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/agent/test_anthropic_adapter.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index 0ba2ba29f51..259e9c1c523 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -157,6 +157,13 @@ class TestBuildAnthropicClient:
 
 
 class TestReadClaudeCodeCredentials:
+    @pytest.fixture(autouse=True)
+    def no_keychain(self, monkeypatch):
+        monkeypatch.setattr(
+            "agent.anthropic_adapter._read_claude_code_credentials_from_keychain",
+            lambda: None,
+        )
+
     def test_reads_valid_credentials(self, tmp_path, monkeypatch):
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)

From 8973b00ff3665a76b69ca17e57c8cd1a39b32d53 Mon Sep 17 00:00:00 2001
From: flamiinngo <kingsleyemeka117@gmail.com>
Date: Sun, 17 May 2026 02:10:50 +0100
Subject: [PATCH 207/218] fix(scripts): fix UnicodeEncodeError in footgun
 checker on Windows

The check-windows-footguns.py script outputs a checkmark (U+2713) and
cross (U+2717) to report results. Windows terminals default to cp1252,
which cannot encode these characters, so running the script on Windows
threw a UnicodeEncodeError before any results were printed.

This made the tool completely unusable on the exact platform it exists
to help -- a developer on Windows trying to check their code for
Windows-safety issues would just get a crash instead.

Fix: reconfigure stdout and stderr to UTF-8 at the start of main(),
before any output is produced. Verified on Windows 11 Home with
Python 3.13 (terminal defaulting to cp1252).
---
 scripts/check-windows-footguns.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/scripts/check-windows-footguns.py b/scripts/check-windows-footguns.py
index f424be90710..7ae7ca50c4e 100644
--- a/scripts/check-windows-footguns.py
+++ b/scripts/check-windows-footguns.py
@@ -551,6 +551,14 @@ def print_rules() -> None:
 
 
 def main(argv: list[str]) -> int:
+    # Windows terminals default to cp1252, which can't encode the ✓/✗
+    # characters used in the output. Reconfigure streams to UTF-8 so the
+    # script works correctly on the very platform it is designed to help.
+    if hasattr(sys.stdout, "reconfigure"):
+        sys.stdout.reconfigure(encoding="utf-8")
+    if hasattr(sys.stderr, "reconfigure"):
+        sys.stderr.reconfigure(encoding="utf-8")
+
     args = parse_args(argv)
 
     if args.list:

From 04bb30730a66ff17fe3dcb509d6fd572da3eb014 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 23:04:09 -0700
Subject: [PATCH 208/218] chore(release): AUTHOR_MAP entries for batch salvage
 group 3 contributors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds release-note attribution mappings for 9 contributors from group 3:
- @darvsum (PR #26766)
- @hueilau (PR #26498)
- @Timur00Kh (PR #27114)
- @Grogger (PR #27061)
- @lemassykoi (PR #27042)
- @draplater (PR #26707)
- @pr7426 (PR #27048)
- @therahul-yo (PR #26215)
- @flamiinngo (PR #27205)

#27154 dropped from this batch — already landed on main as 4e9cedcd4.
---
 scripts/release.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 6bbc2ad4ae3..52da4c2f4b7 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1113,6 +1113,19 @@ AUTHOR_MAP = {
     "hermesagent26@gmail.com": "hermesagent26",  # PR #26438 (kimi model-name reasoning pad)
     "276067471+hermesagent26@users.noreply.github.com": "hermesagent26",
     "71590782+kriscolab@users.noreply.github.com": "kriscolab",  # PR #26926 (deepseek default_aux_model)
+    # batch salvage (May 2026 LHF run, group 3)
+    "darvsum@users.noreply.github.com": "darvsum",  # PR #26766 (preserve discover_models in normalize)
+    "peter@Peters-Mac-mini.local": "hueilau",  # PR #26498 (strip image parts for non-vision)
+    "33933019+hueilau@users.noreply.github.com": "hueilau",
+    "32297275+Timur00Kh@users.noreply.github.com": "Timur00Kh",  # PR #27114 (telegram DM topic for synthetic events)
+    "al.bellemare@gmail.com": "Grogger",  # PR #27061 (windows console flash suppress)
+    "clement@nousresearch.com": "lemassykoi",  # PR #27042 (model-switch probe keyless providers)
+    "16377344+lemassykoi@users.noreply.github.com": "lemassykoi",
+    "draplater@icloud.com": "draplater",  # PR #26707 (goal judge current time)
+    "6349758+draplater@users.noreply.github.com": "draplater",
+    "pr7426@users.noreply.github.com": "pr7426",  # PR #27048 (cron parallel job loss)
+    "rahulnilvan43@gmail.com": "therahul-yo",  # PR #26215 (mock keychain in tests)
+    "kingsleyemeka117@gmail.com": "flamiinngo",  # PR #27205 (UnicodeEncodeError footgun checker)
 }
 
 
From c1ae18ee815eba605c1b021e1b0b2a9c765b2d71 Mon Sep 17 00:00:00 2001
From: EloquentBrush0x <283442588+EloquentBrush0x@users.noreply.github.com>
Date: Sat, 16 May 2026 02:59:39 +0300
Subject: [PATCH 209/218] fix(gateway): add trust_env=True to aiohttp sessions
 in SMS, Slack, Teams, Google Chat adapters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

aiohttp.ClientSession defaults to trust_env=False, which silently ignores
HTTP_PROXY, HTTPS_PROXY, and ALL_PROXY environment variables. Users behind
a corporate or network proxy cannot reach external APIs on any of these
platforms — all outbound requests fail with connection errors.

Symmetric with wecom.py (line 276), weixin.py (lines 1055/1268/1274), and
matrix.py (no-proxy path) which already set this flag. Complements the
open LINE fix (#26635) with the remaining gateway and plugin adapters.

Changed:
- gateway/platforms/sms.py: persistent Twilio session (connect) + fallback
  session (send) — both hit https://api.twilio.com
- gateway/platforms/slack.py: ephemeral response_url POST session —
  hits https://hooks.slack.com/... callback URLs
- plugins/platforms/teams/adapter.py: standalone send session —
  hits login.microsoftonline.com (token) + Bot Framework service URL
- plugins/platforms/google_chat/adapter.py: standalone send session —
  hits https://chat.googleapis.com/v1/...

WhatsApp sessions are excluded: they connect to http://127.0.0.1:{port}
(local bridge) and must not be routed through a system proxy.
---
 gateway/platforms/slack.py               | 2 +-
 gateway/platforms/sms.py                 | 2 ++
 plugins/platforms/google_chat/adapter.py | 2 +-
 plugins/platforms/teams/adapter.py       | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 2116b569f96..5accfdb4108 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -482,7 +482,7 @@ class SlackAdapter(BasePlatformAdapter):
             "text": text,
         }
         try:
-            async with aiohttp.ClientSession() as session:
+            async with aiohttp.ClientSession(trust_env=True) as session:
                 async with session.post(
                     ctx["response_url"],
                     json=payload,
diff --git a/gateway/platforms/sms.py b/gateway/platforms/sms.py
index 2cf7db69b74..9d9957d5ea1 100644
--- a/gateway/platforms/sms.py
+++ b/gateway/platforms/sms.py
@@ -128,6 +128,7 @@ class SmsAdapter(BasePlatformAdapter):
         await site.start()
         self._http_session = aiohttp.ClientSession(
             timeout=aiohttp.ClientTimeout(total=30),
+            trust_env=True,
         )
         self._running = True
 
@@ -169,6 +170,7 @@ class SmsAdapter(BasePlatformAdapter):
 
         session = self._http_session or aiohttp.ClientSession(
             timeout=aiohttp.ClientTimeout(total=30),
+            trust_env=True,
         )
         try:
             for chunk in chunks:
diff --git a/plugins/platforms/google_chat/adapter.py b/plugins/platforms/google_chat/adapter.py
index d8777bf7101..1520d6664eb 100644
--- a/plugins/platforms/google_chat/adapter.py
+++ b/plugins/platforms/google_chat/adapter.py
@@ -3246,7 +3246,7 @@ async def _standalone_send(
         return {"error": "Google Chat standalone send: aiohttp not installed"}
 
     try:
-        async with _aiohttp.ClientSession(timeout=_aiohttp.ClientTimeout(total=30.0)) as session:
+        async with _aiohttp.ClientSession(timeout=_aiohttp.ClientTimeout(total=30.0), trust_env=True) as session:
             async with session.post(
                 url,
                 json=body,
diff --git a/plugins/platforms/teams/adapter.py b/plugins/platforms/teams/adapter.py
index c71baeb9d93..f8a1dc3d5b4 100644
--- a/plugins/platforms/teams/adapter.py
+++ b/plugins/platforms/teams/adapter.py
@@ -566,7 +566,7 @@ async def _standalone_send(
         # Per-request timeouts so a slow STS endpoint cannot starve the
         # subsequent activity POST of its budget.
         per_request_timeout = _aiohttp.ClientTimeout(total=15.0)
-        async with _aiohttp.ClientSession() as session:
+        async with _aiohttp.ClientSession(trust_env=True) as session:
             async with session.post(
                 token_url,
                 data={

From fdd455bc58b8708eb2c7e3e5d83efca3ec49e4a4 Mon Sep 17 00:00:00 2001
From: subtract0 <205509009+subtract0@users.noreply.github.com>
Date: Sat, 16 May 2026 23:09:31 -0700
Subject: [PATCH 210/218] fix(gateway): avoid zsh status variable in update
 wrapper

---
 gateway/run.py                         | 6 +++++-
 tests/gateway/test_update_streaming.py | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gateway/run.py b/gateway/run.py
index 56185190e26..81ce914b8ab 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -12837,7 +12837,11 @@ class GatewayRunner:
                 update_cmd = (
                     f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
                     f" > {shlex.quote(str(output_path))} 2>&1; "
-                    f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
+                    # Avoid `status=$?`: `status` is a read-only special parameter
+                    # in zsh, and this command string is copied/reused in macOS/zsh
+                    # operator wrappers. Keep the template zsh-safe even though this
+                    # specific subprocess currently runs under bash.
+                    f"rc=$?; printf '%s' \"$rc\" > {shlex.quote(str(exit_code_path))}"
                 )
                 setsid_bin = shutil.which("setsid")
                 if setsid_bin:
diff --git a/tests/gateway/test_update_streaming.py b/tests/gateway/test_update_streaming.py
index 932bd1b0579..eb0f0cfa890 100644
--- a/tests/gateway/test_update_streaming.py
+++ b/tests/gateway/test_update_streaming.py
@@ -237,6 +237,8 @@ class TestUpdateCommandGatewayFlag:
         cmd_string = call_args[-1] if isinstance(call_args, list) else str(call_args)
         assert "--gateway" in cmd_string
         assert "PYTHONUNBUFFERED" in cmd_string
+        assert "rc=$?" in cmd_string
+        assert "status=$?" not in cmd_string
         assert "stream progress" in result
 
 
From 364a1dd290245093f76837c6074bb7d4fdc798c6 Mon Sep 17 00:00:00 2001
From: zwolniony <12735938+zwolniony@users.noreply.github.com>
Date: Sat, 16 May 2026 23:09:31 -0700
Subject: [PATCH 211/218] Local: doctor uses x-goog-api-key for Google
 generativelanguage endpoint

---
 hermes_cli/doctor.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 9d3b6e3c01a..07aaa2e38bc 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -1474,6 +1474,15 @@ def run_doctor(args):
             }
             if base_url_host_matches(base, "api.kimi.com"):
                 headers["User-Agent"] = "claude-code/0.1.0"
+            # Google's Generative Language API (generativelanguage.googleapis.com)
+            # rejects ``Authorization: Bearer <api-key>`` with 401
+            # ``ACCESS_TOKEN_TYPE_UNSUPPORTED`` — that header is reserved for
+            # OAuth 2 access tokens, not plain API keys. Plain keys use
+            # ``x-goog-api-key`` (or ``?key=``). Without this, a perfectly valid
+            # GOOGLE_API_KEY/GEMINI_API_KEY always shows red in ``hermes doctor``.
+            if url and base_url_host_matches(url, "generativelanguage.googleapis.com"):
+                headers.pop("Authorization", None)
+                headers["x-goog-api-key"] = key
             r = httpx.get(url, headers=headers, timeout=10)
             if (
                 pname == "Alibaba/DashScope"

From a3017508bf88e663c318495d191904020f77a0f5 Mon Sep 17 00:00:00 2001
From: Ambuj Kumar <ambuj@dodopayments.com>
Date: Sat, 16 May 2026 02:23:25 +0530
Subject: [PATCH 212/218] fix(gateway): preserve underscores in plain-text
 identifiers

---
 gateway/platforms/helpers.py      | 4 ++--
 tests/gateway/test_bluebubbles.py | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/helpers.py b/gateway/platforms/helpers.py
index 1c4f451585a..a3704bf50cf 100644
--- a/gateway/platforms/helpers.py
+++ b/gateway/platforms/helpers.py
@@ -168,8 +168,8 @@ class TextBatchAggregator:
 # Pre-compiled regexes for performance
 _RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
 _RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
-_RE_BOLD_UNDER = re.compile(r"__(.+?)__", re.DOTALL)
-_RE_ITALIC_UNDER = re.compile(r"_(.+?)_", re.DOTALL)
+_RE_BOLD_UNDER = re.compile(r"\b__(?![\s_])(.+?)(?<![\s_])__\b", re.DOTALL)
+_RE_ITALIC_UNDER = re.compile(r"\b_(?![\s_])(.+?)(?<![\s_])_\b", re.DOTALL)
 _RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
 _RE_INLINE_CODE = re.compile(r"`(.+?)`")
 _RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
diff --git a/tests/gateway/test_bluebubbles.py b/tests/gateway/test_bluebubbles.py
index e3ff26cc695..6f93c1d4dba 100644
--- a/tests/gateway/test_bluebubbles.py
+++ b/tests/gateway/test_bluebubbles.py
@@ -101,6 +101,11 @@ class TestBlueBubblesHelpers:
         adapter = _make_adapter(monkeypatch)
         assert adapter.format_message("**Hello** `world`") == "Hello world"
 
+    def test_format_message_preserves_underscores_in_identifiers(self, monkeypatch):
+        adapter = _make_adapter(monkeypatch)
+        text = "Use /api_v2 with FEATURE_FLAG_NAME and config_file.json"
+        assert adapter.format_message(text) == text
+
     def test_strip_markdown_headers(self, monkeypatch):
         adapter = _make_adapter(monkeypatch)
         assert adapter.format_message("## Heading\ntext") == "Heading\ntext"

From 0afab4a32b3b371ac3b5ab17d745aab823444ae3 Mon Sep 17 00:00:00 2001
From: Franci Penov <francip@gmail.com>
Date: Thu, 14 May 2026 22:37:51 -0700
Subject: [PATCH 213/218] feat(gateway): extract auto-TTS markdown strip into
 prepare_tts_text() hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor the inlined `re.sub(...)[:4000].strip()` cleanup at the
auto-TTS site in `_process_message_background` into an overridable
method `BasePlatformAdapter.prepare_tts_text(text: str) -> str`.

The default implementation is byte-identical to the previous inline
expression — strip `* _ \` # [ ] ( )` and truncate to 4000 chars — so
every existing adapter (Telegram, Discord, Slack, Matrix, IRC, etc.)
gets exactly the same behaviour as before. Zero behaviour change for
any consumer that doesn't override the method.

Why add the hook: voice-first platform adapters need stricter
cleanup than text-bubble platforms. The default strips a handful of
markdown sigils, which is fine when the output goes into a Discord
embed or a Telegram message bubble — but read aloud by a TTS engine,
URLs (`https://example.com/foo`), fenced code blocks, file paths
(`/Users/x/foo.py`), and `MEDIA:` tags turn into long sequences of
unintelligible characters. With this hook an adapter can drop those
spans before TTS while leaving the data-channel transcript intact
for visual rendering.

Without the hook, voice adapters have to either
  - duplicate the auto-TTS flow inside their own `handle_response`
    pipeline, which means re-implementing the entire `extract_media`,
    `extract_images`, `extract_local_files`, attachment routing and
    error-handling sequence in `_process_message_background`, or
  - live with TTS speaking URLs character-by-character.

Both are worse than a 7-line method addition.

Example consumer:
  https://github.com/kortexa-ai/hermes-livekit — LiveKit WebRTC voice
  gateway plugin. Its `LiveKitAdapter.prepare_tts_text()` additionally
  strips fenced code blocks, inline code, URLs, file paths, and
  `MEDIA:` tags before TTS synthesis, while the full response still
  reaches connected clients via the data channel. Drop-in installable
  via `pip install git+https://github.com/kortexa-ai/hermes-livekit.git`.

Carved out of #3894 (LiveKit WebRTC gateway PR) so the generic hook
can land independently of the LiveKit platform itself.
---
 gateway/platforms/base.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 7b3147e21f4..96b56d29cc7 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -2014,6 +2014,13 @@ class BasePlatformAdapter(ABC):
             text = f"{caption}\n{text}"
         return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)
 
+    def prepare_tts_text(self, text: str) -> str:
+        """Prepare text for TTS. Override to filter tool output, code, etc.
+
+        Default strips markdown formatting and truncates to 4000 chars.
+        """
+        return re.sub(r'[*_`#\[\]()]', '', text)[:4000].strip()
+
     async def play_tts(
         self,
         chat_id: str,
@@ -3144,7 +3151,7 @@ class BasePlatformAdapter(ABC):
                         from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                         if check_tts_requirements():
                             import json as _json
-                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
+                            speech_text = self.prepare_tts_text(text_content)
                             if not speech_text:
                                 raise ValueError("Empty text after markdown cleanup")
                             tts_result_str = await asyncio.to_thread(

From b389796ae3a33256ff1b4077acc1169831fb63e1 Mon Sep 17 00:00:00 2001
From: zccyman <zccyman@163.com>
Date: Thu, 14 May 2026 07:49:52 +0800
Subject: [PATCH 214/218] fix(auxiliary): resolve api_key_env alias in named
 custom provider path of resolve_provider_client
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In resolve_provider_client(), the named custom provider code path at
~line 2914 only checked the ``key_env`` field when looking for an
environment-variable-based API key. The documented ``api_key_env``
snake_case alias was silently ignored, causing custom providers
configured with ``api_key_env`` to fall through to the
``no-key-required`` placeholder — which produces a confusing 401
(``****ired`` mask) on auth-required remote endpoints.

This mirrors the same fix already applied to run_agent.py in commit
6ddc48b05 (fix(fallback): resolve api_key_env in fallback chain entries).

Also adds a logger.warning() when the placeholder is reached, so
future alias gaps are easier to debug.

Closes #25091
---
 agent/auxiliary_client.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index cfc44e5f2a6..102ff79f1ce 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -3049,10 +3049,17 @@ def resolve_provider_client(
         if custom_entry:
             custom_base = custom_entry.get("base_url", "").strip()
             custom_key = custom_entry.get("api_key", "").strip()
-            custom_key_env = custom_entry.get("key_env", "").strip()
+            custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
             if not custom_key and custom_key_env:
                 custom_key = os.getenv(custom_key_env, "").strip()
             custom_key = custom_key or "no-key-required"
+            if custom_key == "no-key-required":
+                logger.warning(
+                    "resolve_provider_client: named custom provider %r has no resolvable "
+                    "api_key — request will be sent with placeholder no-key-required "
+                    "and will 401 on auth-required endpoints",
+                    custom_entry.get("name") or provider,
+                )
             # An explicit per-task api_mode override (from _resolve_task_provider_model)
             # wins; otherwise fall back to what the provider entry declared.
             entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()

From 5631345b12aa5fa7ead11203624e646b42c8936f Mon Sep 17 00:00:00 2001
From: bitkyc08-arch <bitkyc08@gmail.com>
Date: Sat, 16 May 2026 16:41:03 +0900
Subject: [PATCH 215/218] [agent] fix: harden api server response headers

---
 gateway/platforms/api_server.py  | 5 +++++
 tests/gateway/test_api_server.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index ebd4f014690..0668896e170 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -510,7 +510,12 @@ else:
     body_limit_middleware = None  # type: ignore[assignment]
 
 _SECURITY_HEADERS = {
+    "Content-Security-Policy": "default-src 'none'; frame-ancestors 'none'",
+    "Permissions-Policy": "camera=(), microphone=(), geolocation=()",
+    "Strict-Transport-Security": "max-age=31536000; includeSubDomains",
     "X-Content-Type-Options": "nosniff",
+    "X-Frame-Options": "DENY",
+    "X-XSS-Protection": "0",
     "Referrer-Policy": "no-referrer",
 }
 
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 7d08d64bb32..aae5f550532 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -445,7 +445,12 @@ class TestHealthEndpoint:
         async with TestClient(TestServer(app)) as cli:
             resp = await cli.get("/health")
             assert resp.status == 200
+            assert resp.headers.get("Content-Security-Policy") == "default-src 'none'; frame-ancestors 'none'"
+            assert resp.headers.get("Permissions-Policy") == "camera=(), microphone=(), geolocation=()"
+            assert resp.headers.get("Strict-Transport-Security") == "max-age=31536000; includeSubDomains"
             assert resp.headers.get("X-Content-Type-Options") == "nosniff"
+            assert resp.headers.get("X-Frame-Options") == "DENY"
+            assert resp.headers.get("X-XSS-Protection") == "0"
             assert resp.headers.get("Referrer-Policy") == "no-referrer"
 
     @pytest.mark.asyncio

From 52c89715a29198d838dac54e229aba9cf328e408 Mon Sep 17 00:00:00 2001
From: phoenixshen <1594534+phoenixshen@users.noreply.github.com>
Date: Sat, 16 May 2026 23:09:31 -0700
Subject: [PATCH 216/218] fix: respect user-configured vision model for
 OpenRouter

_OPENROUTER_MODEL hardcoded 'google/gemini-3-flash-preview' which
returns 404 on OpenRouter, breaking all vision tasks for users who
rely on the OpenRouter default.  Additionally, _try_openrouter()
ignored the user-configured auxiliary.vision.model entirely.

Changes:
- Update _OPENROUTER_MODEL default to google/gemini-2.5-flash (valid)
- Add optional 'model' parameter to _try_openrouter()
- Pass configured model from _resolve_strict_vision_backend() through
  to _try_openrouter()

This allows users who set auxiliary.vision.model (e.g. x-ai/grok-4.3)
to have it actually used, while maintaining backward compatibility.
---
 agent/auxiliary_client.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 102ff79f1ce..e02fa1911f7 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -424,7 +424,7 @@ NOUS_EXTRA_BODY = _nous_extra_body()
 auxiliary_is_nous: bool = False
 
 # Default auxiliary models per provider
-_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
+_OPENROUTER_MODEL = "google/gemini-2.5-flash"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
@@ -1473,7 +1473,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
 
 
-def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
     pool_present, entry = _select_pool_entry("openrouter")
     if pool_present:
         or_key = explicit_api_key or _pool_runtime_api_key(entry)
@@ -1483,7 +1483,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
         base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
         logger.debug("Auxiliary client: OpenRouter via pool")
         return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=build_or_headers()), _OPENROUTER_MODEL
+                       default_headers=build_or_headers()), model or _OPENROUTER_MODEL
 
     or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
     if not or_key:
@@ -1491,7 +1491,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
         return None, None
     logger.debug("Auxiliary client: OpenRouter")
     return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                   default_headers=build_or_headers()), _OPENROUTER_MODEL
+                   default_headers=build_or_headers()), model or _OPENROUTER_MODEL
 
 
 def _describe_openrouter_unavailable() -> str:
@@ -3407,7 +3407,7 @@ def _resolve_strict_vision_backend(
     if provider == "copilot":
         return resolve_provider_client("copilot", model, is_vision=True)
     if provider == "openrouter":
-        return _try_openrouter()
+        return _try_openrouter(model=model)
     if provider == "nous":
         return _try_nous(vision=True)
     if provider == "openai-codex":

From 35b7befc67315da5d4ce6b6a3daa4d9ba2f57c1c Mon Sep 17 00:00:00 2001
From: AhmetArif0 <147827411+AhmetArif0@users.noreply.github.com>
Date: Sat, 16 May 2026 02:06:31 +0300
Subject: [PATCH 217/218] fix(line): add trust_env=True to all _LineClient
 aiohttp sessions

_LineClient's five aiohttp.ClientSession() calls omit trust_env=True,
silently bypassing HTTP_PROXY / HTTPS_PROXY / ALL_PROXY. Result: every
LINE API call (reply, push, loading, fetch_content, get_bot_user_id)
ignores the system proxy.

Fix: add trust_env=True to all five session constructions. Symmetric
with the wecom and weixin adapters which already set this flag. No
behavior change for users not behind a proxy.
---
 plugins/platforms/line/adapter.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/plugins/platforms/line/adapter.py b/plugins/platforms/line/adapter.py
index db5d3564d32..907f16be4ff 100644
--- a/plugins/platforms/line/adapter.py
+++ b/plugins/platforms/line/adapter.py
@@ -447,7 +447,7 @@ class _LineClient:
     async def reply(self, reply_token: str, messages: List[Dict[str, Any]]) -> None:
         import aiohttp
         timeout = aiohttp.ClientTimeout(total=self._timeout)
-        async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
             async with session.post(
                 LINE_REPLY_URL,
                 headers=self._headers,
@@ -460,7 +460,7 @@ class _LineClient:
     async def push(self, chat_id: str, messages: List[Dict[str, Any]]) -> None:
         import aiohttp
         timeout = aiohttp.ClientTimeout(total=self._timeout)
-        async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
             async with session.post(
                 LINE_PUSH_URL,
                 headers=self._headers,
@@ -479,7 +479,7 @@ class _LineClient:
         clamped = max(5, min(60, (seconds // 5) * 5 or 5))
         try:
             timeout = aiohttp.ClientTimeout(total=5.0)
-            async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
                 await session.post(
                     LINE_LOADING_URL,
                     headers=self._headers,
@@ -493,7 +493,7 @@ class _LineClient:
         import aiohttp
         url = LINE_CONTENT_URL_FMT.format(message_id=message_id)
         timeout = aiohttp.ClientTimeout(total=30.0)
-        async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
             async with session.get(url, headers={"Authorization": f"Bearer {self._token}"}) as resp:
                 if resp.status >= 400:
                     raise RuntimeError(f"LINE content {resp.status}")
@@ -504,7 +504,7 @@ class _LineClient:
         import aiohttp
         timeout = aiohttp.ClientTimeout(total=10.0)
         try:
-            async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
                 async with session.get(LINE_BOT_INFO_URL, headers=self._headers) as resp:
                     if resp.status >= 400:
                         return None

From 7322816efa601737722c74147194f1f5ffd3ad07 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 16 May 2026 23:10:34 -0700
Subject: [PATCH 218/218] chore(release): AUTHOR_MAP entries for batch salvage
 group 4 contributors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds release-note attribution mappings for 9 contributors from group 4:
- @EloquentBrush0x (PR #26657)
- @subtract0 (PR #25658)
- @zwolniony (PR #26961)
- @that-ambuj (PR #26582)
- @zccyman (PR #25294)
- @lidge-jun (PR #26814)
- @phoenixshen (PR #26768)
- @AhmetArif0 (PR #26635)
- (francip already mapped from prior PR #26134 attribution)

#27147 dropped from this batch — already landed on main as 4b17c2411.
---
 scripts/release.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 52da4c2f4b7..c388116cff6 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1126,6 +1126,17 @@ AUTHOR_MAP = {
     "pr7426@users.noreply.github.com": "pr7426",  # PR #27048 (cron parallel job loss)
     "rahulnilvan43@gmail.com": "therahul-yo",  # PR #26215 (mock keychain in tests)
     "kingsleyemeka117@gmail.com": "flamiinngo",  # PR #27205 (UnicodeEncodeError footgun checker)
+    # batch salvage (May 2026 LHF run, group 4)
+    "283442588+EloquentBrush0x@users.noreply.github.com": "EloquentBrush0x",  # PR #26657 (trust_env aiohttp)
+    "205509009+subtract0@users.noreply.github.com": "subtract0",  # PR #25658 (zsh $status -> $rc)
+    "patryk@jarmakowicz.me": "zwolniony",  # PR #26961 (gemini x-goog-api-key)
+    "12735938+zwolniony@users.noreply.github.com": "zwolniony",
+    "ambuj@dodopayments.com": "that-ambuj",  # PR #26582 (preserve underscores)
+    "zccyman@163.com": "zccyman",  # PR #25294 (custom provider api_key_env alias)
+    "bitkyc08@gmail.com": "lidge-jun",  # PR #26814 (api server browser security headers)
+    "sp_ps@Mac-mini.lan": "phoenixshen",  # PR #26768 (respect user-configured vision model)
+    "1594534+phoenixshen@users.noreply.github.com": "phoenixshen",
+    "147827411+AhmetArif0@users.noreply.github.com": "AhmetArif0",  # PR #26635 (line proxy env vars)
 }