Merge branch 'main' into docker_s6

2026-06-08 08:11:38 +00:00 · 2026-05-25 09:39:27 +10:00 · 2026-05-25 09:39:27 +10:00 · 59da190512
commit 59da190512
parent 0988ab83b7 9c08070703
417 changed files with 26434 additions and 3321 deletions
--- a/tests/tools/conftest.py
+++ b/tests/tools/conftest.py
@ -8,6 +8,8 @@ depend on the registry being populated should use it explicitly or via
 ``@pytest.mark.usefixtures("web_registry_populated")``.
 """

+from unittest.mock import patch
+
 import pytest


@ -48,3 +50,20 @@ def web_registry_populated():
    yield
    from agent.web_search_registry import _reset_for_tests
    _reset_for_tests()
+
+
+@pytest.fixture
+def disable_lazy_stt_install():
+    """Disarm the runtime lazy-install probe so static ``_HAS_FASTER_WHISPER``
+    patches accurately simulate 'faster-whisper not installed'.
+
+    Without this, ``_try_lazy_install_stt()`` calls
+    ``importlib.util.find_spec("faster_whisper")``, which returns truthy
+    whenever the package is installed in the dev / CI environment —
+    defeating the test's ``_HAS_FASTER_WHISPER=False`` patch.
+
+    Opt in at module scope with
+    ``pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")``.
+    """
+    with patch("tools.transcription_tools._try_lazy_install_stt", return_value=False):
+        yield
--- a/tests/tools/test_approval.py
+++ b/tests/tools/test_approval.py
@ -1,6 +1,9 @@
 """Tests for the dangerous command approval module."""

 import ast
+import os
+import threading
+import time
 from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import patch as mock_patch
@ -1305,3 +1308,165 @@ class TestEtcPatternsUnaffectedByRefactor:
    def test_grep_etc_passwd_is_safe(self):
        dangerous, _, _ = detect_dangerous_command("grep root /etc/passwd")
        assert dangerous is False
+
+
+# =========================================================================
+# Gateway approval timeout = deny, NOT consent (#24912)
+#
+# A Slack user walked away mid-conversation; the agent requested approval
+# to run `rm -rf .git`; the prompt timed out; the agent ran the command
+# anyway. Reported by @tofalck on 2026-05-13, corroborated by
+# @angry-programmer on Telegram. Silence is not consent.
+#
+# These tests pin:
+#   1. Gateway timeout → approved=False, with a message strong enough that
+#      a downstream agent reading "BLOCKED: ... Silence is not consent."
+#      treats it as a hard halt, not an invitation to rephrase.
+#   2. The structured outcome / user_consent fields are present so
+#      plugins, hooks, and audit pipelines can act on the timeout without
+#      string-parsing the message.
+#   3. Explicit /deny carries the same shape (treat-as-not-consented).
+# =========================================================================
+
+
+class TestApprovalTimeoutIsNotConsent:
+    """The gateway approval contract: silence is not consent (#24912)."""
+
+    SESSION_KEY = "test-no-consent-session"
+
+    def setup_method(self):
+        """Reset module state and force tight gateway_timeout for fast tests."""
+        from tools import approval as mod
+        mod._gateway_queues.clear()
+        mod._gateway_notify_cbs.clear()
+        mod._session_approved.clear()
+        mod._permanent_approved.clear()
+        mod._pending.clear()
+
+        self._saved_env = {
+            k: os.environ.get(k)
+            for k in ("HERMES_GATEWAY_SESSION", "HERMES_YOLO_MODE",
+                      "HERMES_SESSION_KEY", "HERMES_INTERACTIVE")
+        }
+        os.environ.pop("HERMES_YOLO_MODE", None)
+        os.environ.pop("HERMES_INTERACTIVE", None)
+        os.environ["HERMES_GATEWAY_SESSION"] = "1"
+        os.environ["HERMES_SESSION_KEY"] = self.SESSION_KEY
+
+    def teardown_method(self):
+        from tools import approval as mod
+        mod._gateway_queues.clear()
+        mod._gateway_notify_cbs.clear()
+        for k, v in self._saved_env.items():
+            if v is None:
+                os.environ.pop(k, None)
+            else:
+                os.environ[k] = v
+
+    def _force_short_timeout(self, monkeypatch, seconds=1):
+        from tools import approval as mod
+        monkeypatch.setattr(
+            mod, "_get_approval_config",
+            lambda: {"mode": "manual", "gateway_timeout": seconds, "timeout": seconds},
+        )
+
+    def test_timeout_returns_approved_false_with_no_consent(self, monkeypatch):
+        """The reported #24912 scenario — user never responds, agent must see BLOCKED."""
+        from tools import approval as mod
+
+        self._force_short_timeout(monkeypatch, seconds=1)
+
+        # Slack-shaped: notify_cb registered, but user doesn't respond.
+        notified = []
+        mod.register_gateway_notify(self.SESSION_KEY, lambda data: notified.append(data))
+
+        result = mod.check_all_command_guards("rm -rf .git", "local")
+
+        assert result["approved"] is False
+        assert result.get("user_consent") is False
+        assert result.get("outcome") == "timeout"
+        # The notify_cb DID fire — we did try to ask the user.
+        assert len(notified) == 1
+
+    def test_timeout_message_is_emphatic_against_retry_and_rephrase(self, monkeypatch):
+        """The BLOCKED message must explicitly tell the agent not to rephrase.
+
+        Without this, the agent treats 'Do NOT retry this command' as
+        permission to try a different command achieving the same outcome.
+        """
+        from tools import approval as mod
+        self._force_short_timeout(monkeypatch, seconds=1)
+        mod.register_gateway_notify(self.SESSION_KEY, lambda data: None)
+
+        result = mod.check_all_command_guards("rm -rf .git", "local")
+
+        msg = result["message"]
+        # Explicit halt signals — these are the model-facing contract.
+        assert "BLOCKED" in msg
+        assert "NOT consented" in msg
+        assert "Silence is not consent" in msg
+        # Both forms of evasion must be named:
+        assert "do NOT retry" in msg.lower() or "Do NOT retry" in msg
+        assert "rephrase" in msg.lower()
+        assert "different command" in msg.lower()
+
+    def test_explicit_deny_carries_same_no_consent_shape(self):
+        """An explicit /deny must produce the same shape as timeout —
+        the agent should treat both identically."""
+        from tools import approval as mod
+
+        notified = []
+        mod.register_gateway_notify(self.SESSION_KEY, lambda data: notified.append(data))
+
+        # Spawn the approval wait in a thread, then resolve it with "deny".
+        result_holder = {}
+        def _check():
+            result_holder["r"] = mod.check_all_command_guards("rm -rf .git", "local")
+        t = threading.Thread(target=_check)
+        t.start()
+
+        # Wait for the queue entry to appear, then resolve.
+        for _ in range(50):
+            if mod._gateway_queues.get(self.SESSION_KEY):
+                break
+            time.sleep(0.02)
+        mod.resolve_gateway_approval(self.SESSION_KEY, "deny")
+        t.join(timeout=5)
+        assert "r" in result_holder, "approval wait did not return after deny"
+
+        r = result_holder["r"]
+        assert r["approved"] is False
+        assert r.get("user_consent") is False
+        assert r.get("outcome") == "denied"
+        assert "Silence is not consent" not in r["message"]  # this one IS denied, not timed-out
+        assert "NOT consented" in r["message"]
+        assert "rephrase" in r["message"].lower()
+
+    def test_timeout_emits_post_hook_with_timeout_outcome(self, monkeypatch):
+        """Plugins must be able to distinguish timeout from explicit deny.
+
+        This is what an audit / notification plugin needs to alert
+        operators on 'agent asked, user never replied' incidents like #24912.
+        """
+        from tools import approval as mod
+        self._force_short_timeout(monkeypatch, seconds=1)
+        mod.register_gateway_notify(self.SESSION_KEY, lambda data: None)
+
+        hook_calls = []
+        original_fire = mod._fire_approval_hook
+
+        def _capture(event_name, **kwargs):
+            hook_calls.append((event_name, kwargs))
+            return original_fire(event_name, **kwargs)
+
+        monkeypatch.setattr(mod, "_fire_approval_hook", _capture)
+
+        mod.check_all_command_guards("rm -rf .git", "local")
+
+        # post_approval_response must be in the hook log with choice=timeout
+        posts = [c for c in hook_calls if c[0] == "post_approval_response"]
+        assert posts, "post_approval_response hook did not fire"
+        last_post = posts[-1][1]
+        assert last_post.get("choice") == "timeout", (
+            f"hook choice should be 'timeout' on no-response, got {last_post.get('choice')!r}"
+        )
--- a/tests/tools/test_browser_orphan_reaper.py
+++ b/tests/tools/test_browser_orphan_reaper.py
@ -72,7 +72,7 @@ class TestReapOrphanedBrowserSessions:
        assert not d.exists()

    def test_orphaned_alive_daemon_is_killed(self, fake_tmpdir):
-        """Alive daemon not tracked by _active_sessions gets SIGTERM (legacy path).
+        """Alive daemon not tracked by _active_sessions is terminated (legacy path).

        No owner_pid file => falls back to tracked_names check.
        """
@ -82,18 +82,17 @@ class TestReapOrphanedBrowserSessions:

        kill_calls = []

-        def mock_kill(pid, sig):
-            kill_calls.append((pid, sig))
-            # Don't actually kill anything
+        def mock_terminate(pid):
+            kill_calls.append(pid)

        # Post-#21561 the liveness probe goes through
        # ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
        # so it's safe on Windows — ``os.kill(pid, 0)`` is bpo-14484).
        with patch("gateway.status._pid_exists", return_value=True), \
-             patch("os.kill", side_effect=mock_kill):
+             patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

-        assert (12345, signal.SIGTERM) in kill_calls
+        assert 12345 in kill_calls

    def test_tracked_session_is_not_reaped(self, fake_tmpdir):
        """Sessions tracked in _active_sessions are left alone (legacy path)."""
@ -108,13 +107,13 @@ class TestReapOrphanedBrowserSessions:

        kill_calls = []

-        def mock_kill(pid, sig):
-            kill_calls.append((pid, sig))
+        def mock_terminate(pid):
+            kill_calls.append(pid)

-        with patch("os.kill", side_effect=mock_kill):
+        with patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

-        # Should NOT have tried to kill anything
+        # Should NOT have tried to terminate anything
        assert len(kill_calls) == 0
        # Dir should still exist
        assert d.exists()
@ -126,23 +125,24 @@ class TestReapOrphanedBrowserSessions:
        ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
        because ``os.kill(pid, 0)`` is a footgun on Windows — bpo-14484).
        With no owner_pid file and no tracked-name entry, the reaper
-        SIGTERMs the daemon and removes its socket dir regardless of
-        whether SIGTERM succeeded (best-effort semantics).
+        terminates the daemon (and its process tree) and removes its socket
+        dir regardless of whether termination succeeded (best-effort
+        semantics).
        """
        from tools.browser_tool import _reap_orphaned_browser_sessions

        d = _make_socket_dir(fake_tmpdir, "h_perm1234567", pid=12345)

-        sigterm_calls = []
+        terminate_calls = []

-        def mock_kill(pid, sig):
-            sigterm_calls.append((pid, sig))
+        def mock_terminate(pid):
+            terminate_calls.append(pid)

        with patch("gateway.status._pid_exists", return_value=True), \
-             patch("os.kill", side_effect=mock_kill):
+             patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

-        assert (12345, signal.SIGTERM) in sigterm_calls
+        assert 12345 in terminate_calls
        assert not d.exists()

    def test_cdp_sessions_are_also_reaped(self, fake_tmpdir):
@ -203,15 +203,15 @@ class TestOwnerPidCrossProcess:

        kill_calls = []

-        def mock_kill(pid, sig):
-            kill_calls.append((pid, sig))
+        def mock_terminate(pid):
+            kill_calls.append(pid)

        # Owner alive → reaper skips without ever probing the daemon.
        with patch("gateway.status._pid_exists", return_value=True), \
-             patch("os.kill", side_effect=mock_kill):
+             patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

-        assert (12345, signal.SIGTERM) not in kill_calls
+        assert 12345 not in kill_calls
        assert d.exists()

    def test_dead_owner_triggers_reap(self, fake_tmpdir):
@ -225,17 +225,17 @@ class TestOwnerPidCrossProcess:

        kill_calls = []

-        def mock_kill(pid, sig):
-            kill_calls.append((pid, sig))
+        def mock_terminate(pid):
+            kill_calls.append(pid)

        # Owner 999999999 dead, daemon 12345 alive.
        pid_alive = {999999999: False, 12345: True}
        with patch("gateway.status._pid_exists",
                   side_effect=lambda pid: pid_alive.get(int(pid), False)), \
-             patch("os.kill", side_effect=mock_kill):
+             patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

-        assert (12345, signal.SIGTERM) in kill_calls
+        assert 12345 in kill_calls
        assert not d.exists()

    def test_corrupt_owner_pid_falls_back_to_legacy(self, fake_tmpdir):
@ -253,15 +253,15 @@ class TestOwnerPidCrossProcess:

        kill_calls = []

-        def mock_kill(pid, sig):
-            kill_calls.append((pid, sig))
+        def mock_terminate(pid):
+            kill_calls.append(pid)

        with patch("gateway.status._pid_exists", return_value=True), \
-             patch("os.kill", side_effect=mock_kill):
+             patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

        # Legacy path took over → tracked → not reaped
-        assert (12345, signal.SIGTERM) not in kill_calls
+        assert 12345 not in kill_calls
        assert d.exists()

    def test_owner_pid_permission_error_treated_as_alive(self, fake_tmpdir):
@ -280,16 +280,16 @@ class TestOwnerPidCrossProcess:

        kill_calls = []

-        def mock_kill(pid, sig):
-            kill_calls.append((pid, sig))
+        def mock_terminate(pid):
+            kill_calls.append(pid)

        # Owner 22222 reported alive (PermissionError collapses to True
-        # inside _pid_exists). Daemon never probed, never SIGTERMed.
+        # inside _pid_exists). Daemon never probed, never terminated.
        with patch("gateway.status._pid_exists", return_value=True), \
-             patch("os.kill", side_effect=mock_kill):
+             patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
            _reap_orphaned_browser_sessions()

-        assert (12345, signal.SIGTERM) not in kill_calls
+        assert 12345 not in kill_calls
        assert d.exists()

    def test_write_owner_pid_creates_file_with_current_pid(
--- a/tests/tools/test_browser_secret_exfil.py
+++ b/tests/tools/test_browser_secret_exfil.py
@ -31,7 +31,13 @@ class TestBrowserSecretExfil:
    def test_allows_normal_url(self):
        """Normal URLs pass the secret check (may fail for other reasons)."""
        from tools.browser_tool import browser_navigate
-        result = browser_navigate("https://github.com/NousResearch/hermes-agent")
+        # Patch the actual browser command — we only care that the secret
+        # check doesn't block a clean URL, not that Chrome starts in CI.
+        mock_result = {"success": True, "data": {"title": "ok", "url": "https://github.com/NousResearch/hermes-agent"}}
+        with patch("tools.browser_tool._run_browser_command", return_value=mock_result), \
+             patch("tools.browser_tool._get_session_info", return_value={"_first_nav": False}), \
+             patch("tools.browser_tool._is_local_backend", return_value=True):
+            result = browser_navigate("https://github.com/NousResearch/hermes-agent")
        parsed = json.loads(result)
        # Should NOT be blocked by secret detection
        assert "API key or token" not in parsed.get("error", "")
--- a/tests/tools/test_computer_use.py
+++ b/tests/tools/test_computer_use.py
@ -76,6 +76,27 @@ class TestSchema:
        modes = set(COMPUTER_USE_SCHEMA["parameters"]["properties"]["mode"]["enum"])
        assert modes == {"som", "vision", "ax"}

+    def test_schema_exposes_max_elements_cap_for_capture(self):
+        from tools.computer_use.schema import COMPUTER_USE_SCHEMA
+        props = COMPUTER_USE_SCHEMA["parameters"]["properties"]
+        assert "max_elements" in props
+        assert props["max_elements"]["type"] == "integer"
+        assert props["max_elements"].get("minimum", 1) >= 1
+
+    def test_schema_max_elements_documents_default_and_upper_bound(self):
+        """Schema description must agree with the runtime. The original PR
+        text said "Default 100" without a corresponding `default` field, and
+        had no upper bound — both Copilot findings.
+        """
+        from tools.computer_use.schema import COMPUTER_USE_SCHEMA
+        from tools.computer_use.tool import (
+            _DEFAULT_MAX_ELEMENTS,
+            _MAX_ALLOWED_MAX_ELEMENTS,
+        )
+        prop = COMPUTER_USE_SCHEMA["parameters"]["properties"]["max_elements"]
+        assert prop.get("default") == _DEFAULT_MAX_ELEMENTS
+        assert prop.get("maximum") == _MAX_ALLOWED_MAX_ELEMENTS
+

 class TestRegistration:
    def test_tool_registers_with_registry(self):
@ -205,6 +226,54 @@ class TestDispatch:
        parsed = json.loads(out)
        assert "error" in parsed

+    def test_set_value_routes_to_backend(self, noop_backend):
+        """set_value must reach the backend — regression for missing _NoopBackend stub."""
+        from tools.computer_use.tool import handle_computer_use
+        out = handle_computer_use({"action": "set_value", "value": "Option A", "element": 5})
+        parsed = json.loads(out)
+        assert parsed.get("ok") is True
+        assert parsed.get("action") == "set_value"
+        assert any(c[0] == "set_value" for c in noop_backend.calls)
+
+    def test_set_value_missing_value_returns_error(self, noop_backend):
+        from tools.computer_use.tool import handle_computer_use
+        out = handle_computer_use({"action": "set_value"})
+        parsed = json.loads(out)
+        assert "error" in parsed
+    def test_capture_after_skipped_when_action_failed(self, noop_backend):
+        """capture_after must not fire when res.ok=False (regression guard).
+
+        A follow-up screenshot after a failed action shows the screen in a
+        normal state, misleading the model into thinking the action succeeded.
+        """
+        from unittest.mock import patch
+        from tools.computer_use.backend import ActionResult
+        from tools.computer_use.tool import handle_computer_use
+
+        # Make click() return a failure.
+        with patch.object(noop_backend, "click",
+                          return_value=ActionResult(ok=False, action="click",
+                                                    message="element not found")):
+            out = handle_computer_use({"action": "click", "element": 99,
+                                       "capture_after": True})
+
+        parsed = json.loads(out)
+        # Should return the error, not a multimodal capture.
+        assert parsed.get("ok") is False
+        assert parsed.get("action") == "click"
+        # No follow-up capture should have been issued.
+        capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
+        assert len(capture_calls) == 0, "capture must not be called after a failed action"
+
+    def test_capture_after_fires_when_action_succeeds(self, noop_backend):
+        """capture_after must trigger for successful actions."""
+        from tools.computer_use.tool import handle_computer_use
+        out = handle_computer_use({"action": "click", "element": 1,
+                                   "capture_after": True})
+        # Noop backend returns ok=True, so capture should have been called.
+        capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
+        assert len(capture_calls) == 1
+

 # ---------------------------------------------------------------------------
 # Safety guards (type / key block lists)
@ -337,6 +406,193 @@ class TestCaptureResponse:
        assert "AXButton" in text_part["text"]
        assert "AXTextField" in text_part["text"]

+    def _ax_backend_with(self, count: int):
+        """Construct a fake backend that yields ``count`` AX elements."""
+        from tools.computer_use.backend import CaptureResult, UIElement
+
+        elements = [
+            UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
+            for i in range(count)
+        ]
+
+        class FakeBackend:
+            def start(self): pass
+            def stop(self): pass
+            def is_available(self): return True
+            def capture(self, mode="som", app=None):
+                return CaptureResult(
+                    mode=mode, width=800, height=600,
+                    png_b64="",
+                    elements=list(elements),
+                    app="Obsidian",
+                )
+            def click(self, **kw): ...
+            def drag(self, **kw): ...
+            def scroll(self, **kw): ...
+            def type_text(self, text): ...
+            def key(self, keys): ...
+            def list_apps(self): return []
+            def focus_app(self, app, raise_window=False): ...
+
+        return FakeBackend()
+
+    def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
+        """Regression for #22865: an Electron-style 600-element AX tree must
+        not emit the entire array verbatim into the tool result.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
+
+        parsed = json.loads(out)
+        assert parsed["mode"] == "ax"
+        assert parsed["total_elements"] == 600
+        assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS
+        assert parsed["truncated_elements"] == 600 - cu_tool._DEFAULT_MAX_ELEMENTS
+        # Truncation must be visible in the human summary so the model knows
+        # the JSON view is partial and can re-issue with a tighter scope.
+        assert "truncated to" in parsed["summary"]
+
+    def test_capture_ax_honors_explicit_max_elements_override(self):
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use(
+                {"action": "capture", "mode": "ax", "max_elements": 250}
+            )
+
+        parsed = json.loads(out)
+        assert len(parsed["elements"]) == 250
+        assert parsed["truncated_elements"] == 350
+
+    def test_capture_ax_below_cap_is_unchanged(self):
+        """Backwards-compat: small captures keep the full elements array and
+        do not surface a `truncated_elements` field.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(5)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
+
+        parsed = json.loads(out)
+        assert len(parsed["elements"]) == 5
+        assert parsed["total_elements"] == 5
+        assert "truncated_elements" not in parsed
+        assert "truncated to" not in parsed["summary"]
+
+    def test_capture_ax_invalid_max_elements_falls_back_to_default(self):
+        """Malformed `max_elements` (string, negative, zero) must not silently
+        disable the cap and re-introduce the original unbounded behavior.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        for bad in ("not-a-number", 0, -10):
+            with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+                out = cu_tool.handle_computer_use(
+                    {"action": "capture", "mode": "ax", "max_elements": bad}
+                )
+            parsed = json.loads(out)
+            assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS, (
+                f"bad max_elements={bad!r} disabled the cap"
+            )
+
+    def test_capture_ax_clamps_oversized_max_elements_to_hard_cap(self):
+        """A caller passing a very large `max_elements` must not be able to
+        disable the safeguard. The cap is clamped to a hard upper bound so
+        the context-blow-up protection cannot be bypassed by argument.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(5000)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use(
+                {"action": "capture", "mode": "ax", "max_elements": 10_000}
+            )
+        parsed = json.loads(out)
+        assert len(parsed["elements"]) == cu_tool._MAX_ALLOWED_MAX_ELEMENTS
+        assert parsed["total_elements"] == 5000
+        assert parsed["truncated_elements"] == 5000 - cu_tool._MAX_ALLOWED_MAX_ELEMENTS
+
+    def test_capture_ax_summary_indices_match_returned_elements(self):
+        """When `max_elements` is below the human-summary's own line cap, the
+        summary must not index elements that aren't in the returned array.
+        Otherwise the model sees `#15` in the summary and finds no matching
+        entry in `elements`.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use(
+                {"action": "capture", "mode": "ax", "max_elements": 5}
+            )
+        parsed = json.loads(out)
+        returned_indices = {e["index"] for e in parsed["elements"]}
+        summary_lines = parsed["summary"].splitlines()
+        indexed_lines = [ln for ln in summary_lines if ln.lstrip().startswith("#")]
+        for ln in indexed_lines:
+            idx_token = ln.lstrip().split()[0].lstrip("#")
+            idx = int(idx_token)
+            assert idx in returned_indices, (
+                f"summary references #{idx} but it is absent from elements payload "
+                f"(returned: {sorted(returned_indices)})"
+            )
+
+    def test_capture_multimodal_summary_omits_truncation_note(self):
+        """The som/vision multimodal envelope returns a screenshot, not an
+        `elements` array — so a "response truncated to N of M elements"
+        claim in the summary would be inaccurate.
+        """
+        from tools.computer_use.backend import CaptureResult, UIElement
+        from tools.computer_use import tool as cu_tool
+
+        fake_png = "iVBORw0KGgo="
+        elements = [
+            UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
+            for i in range(600)
+        ]
+
+        class FakeBackend:
+            def start(self): pass
+            def stop(self): pass
+            def is_available(self): return True
+            def capture(self, mode="som", app=None):
+                return CaptureResult(
+                    mode=mode, width=800, height=600,
+                    png_b64=fake_png, elements=list(elements),
+                    app="Obsidian",
+                )
+            def click(self, **kw): ...
+            def drag(self, **kw): ...
+            def scroll(self, **kw): ...
+            def type_text(self, text): ...
+            def key(self, keys): ...
+            def list_apps(self): return []
+            def focus_app(self, app, raise_window=False): ...
+
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
+            out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
+
+        assert isinstance(out, dict) and out["_multimodal"] is True
+        text_part = next(p for p in out["content"] if p.get("type") == "text")
+        assert "truncated to" not in text_part["text"], (
+            "multimodal response carries an image, not an elements array; "
+            "the truncation note describes a payload field that isn't present"
+        )
+        assert "truncated to" not in out["text_summary"]
+

 # ---------------------------------------------------------------------------
 # Anthropic adapter: multimodal tool-result conversion
--- a/tests/tools/test_cross_profile_guard.py
+++ b/tests/tools/test_cross_profile_guard.py
@ -0,0 +1,259 @@
+"""Tests for the cross-profile soft guard wired into write_file / patch /
+skill_manage.
+
+The classifier is tested in tests/agent/test_file_safety_cross_profile.py.
+This file tests that the tool surfaces:
+
+  1. Refuse cross-profile writes by default and return the warning.
+  2. Accept cross-profile writes when cross_profile=True is passed.
+  3. Continue to accept in-profile writes normally.
+  4. skill_manage's "not found" error names other profiles where the
+     skill exists.
+"""
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def fake_hermes(tmp_path, monkeypatch):
+    """Build a two-profile Hermes layout and point HERMES_HOME at
+    the hermes-security profile (matching the original-incident shape).
+    """
+    root = tmp_path / "fake-hermes"
+    (root / "skills" / "shared-skill").mkdir(parents=True)
+    (root / "skills" / "shared-skill" / "SKILL.md").write_text(
+        "---\nname: shared-skill\ndescription: default copy.\n---\n"
+    )
+
+    sec_home = root / "profiles" / "hermes-security"
+    (sec_home / "skills").mkdir(parents=True)
+
+    coder_home = root / "profiles" / "coder"
+    (coder_home / "skills").mkdir(parents=True)
+
+    monkeypatch.setenv("HERMES_HOME", str(sec_home))
+
+    import hermes_constants
+    monkeypatch.setattr(hermes_constants, "get_default_hermes_root", lambda: root)
+
+    import agent.file_safety as fs
+    monkeypatch.setattr(fs, "_hermes_home_path", lambda: sec_home)
+    monkeypatch.setattr(fs, "_hermes_root_path", lambda: root)
+
+    return {
+        "root": root,
+        "sec_home": sec_home,
+        "coder_home": coder_home,
+    }
+
+
+# ---------------------------------------------------------------------------
+# write_file
+# ---------------------------------------------------------------------------
+
+
+class TestWriteFileCrossProfileGuard:
+    def test_in_profile_write_allowed(self, fake_hermes):
+        from tools.file_tools import write_file_tool
+        target = fake_hermes["sec_home"] / "skills" / "new-skill" / "SKILL.md"
+        target.parent.mkdir(parents=True)
+        result_json = write_file_tool(str(target), "in-profile content")
+        result = json.loads(result_json)
+        assert not result.get("error"), f"In-profile write should succeed: {result}"
+        assert target.exists()
+        assert target.read_text() == "in-profile content"
+
+    def test_cross_profile_write_blocked_by_default(self, fake_hermes):
+        """The May 2026 incident — security-profile session edits default
+        profile's skill. Must be blocked."""
+        from tools.file_tools import write_file_tool
+        target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
+        original = target.read_text()
+        result_json = write_file_tool(str(target), "OVERWRITTEN")
+        result = json.loads(result_json)
+        assert result.get("error"), "Cross-profile write should be refused"
+        assert "cross-profile" in result["error"].lower()
+        assert "default" in result["error"]
+        assert "hermes-security" in result["error"]
+        # File untouched.
+        assert target.read_text() == original
+
+    def test_cross_profile_True_bypass(self, fake_hermes):
+        """Explicit override after user direction must succeed."""
+        from tools.file_tools import write_file_tool
+        target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
+        result_json = write_file_tool(
+            str(target), "user-directed override", cross_profile=True
+        )
+        result = json.loads(result_json)
+        assert not result.get("error"), f"cross_profile=True must succeed: {result}"
+        assert target.read_text() == "user-directed override"
+
+    def test_non_hermes_path_unaffected(self, fake_hermes, tmp_path):
+        from tools.file_tools import write_file_tool
+        target = tmp_path / "outside" / "main.py"
+        target.parent.mkdir()
+        result_json = write_file_tool(str(target), "print('hello')")
+        result = json.loads(result_json)
+        assert not result.get("error")
+        assert target.exists()
+
+
+# ---------------------------------------------------------------------------
+# patch
+# ---------------------------------------------------------------------------
+
+
+class TestPatchCrossProfileGuard:
+    def test_cross_profile_patch_blocked(self, fake_hermes):
+        from tools.file_tools import patch_tool
+        target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
+        original = target.read_text()
+        result_json = patch_tool(
+            mode="replace",
+            path=str(target),
+            old_string="default copy.",
+            new_string="HIJACKED.",
+        )
+        result = json.loads(result_json)
+        assert result.get("error")
+        assert "cross-profile" in result["error"].lower()
+        assert target.read_text() == original
+
+    def test_cross_profile_patch_bypass(self, fake_hermes):
+        from tools.file_tools import patch_tool
+        target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
+        result_json = patch_tool(
+            mode="replace",
+            path=str(target),
+            old_string="default copy.",
+            new_string="user-directed update.",
+            cross_profile=True,
+        )
+        result = json.loads(result_json)
+        assert not result.get("error"), f"cross_profile=True bypass: {result}"
+        assert "user-directed update." in target.read_text()
+
+    def test_v4a_patch_extracts_path_for_guard(self, fake_hermes):
+        """V4A patches embed the target paths in the patch body, not in
+        a ``path`` kwarg. The guard must still apply."""
+        from tools.file_tools import patch_tool
+        target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
+        original = target.read_text()
+        v4a = (
+            "*** Begin Patch\n"
+            f"*** Update File: {target}\n"
+            "@@\n"
+            "-default copy.\n"
+            "+HIJACKED.\n"
+            "*** End Patch"
+        )
+        result_json = patch_tool(mode="patch", patch=v4a)
+        result = json.loads(result_json)
+        assert result.get("error"), f"V4A cross-profile must block: {result}"
+        assert "cross-profile" in result["error"].lower()
+        assert target.read_text() == original
+
+
+# ---------------------------------------------------------------------------
+# skill_manage — error message naming other profile (item D)
+# ---------------------------------------------------------------------------
+
+
+class TestSkillManageCrossProfileErrorUX:
+    def _make_skill_in_profile(self, profile_dir: Path, name: str):
+        d = profile_dir / "skills" / name
+        d.mkdir(parents=True, exist_ok=True)
+        (d / "SKILL.md").write_text(
+            f"---\nname: {name}\ndescription: a skill.\n---\n"
+        )
+
+    def test_error_names_other_profile_when_skill_lives_there(
+        self, fake_hermes, monkeypatch
+    ):
+        """The original incident shape — model expects 'foo' in active
+        profile, but 'foo' lives in default. Error must point at default."""
+        self._make_skill_in_profile(fake_hermes["root"], "default-only-skill")
+
+        # Re-import the module so SKILLS_DIR picks up HERMES_HOME (set in
+        # the fixture). Skill_manager_tool computes SKILLS_DIR at import.
+        import importlib
+        import tools.skill_manager_tool
+        importlib.reload(tools.skill_manager_tool)
+        from tools.skill_manager_tool import _skill_not_found_error
+
+        err = _skill_not_found_error("default-only-skill")
+        assert "not found in active profile 'hermes-security'" in err
+        assert "default" in err
+        assert "cross_profile=True" in err
+
+    def test_error_names_multiple_profiles(self, fake_hermes, monkeypatch):
+        """When the skill exists in TWO other profiles, both should be named."""
+        self._make_skill_in_profile(fake_hermes["root"], "everywhere-skill")
+        self._make_skill_in_profile(fake_hermes["coder_home"], "everywhere-skill")
+
+        import importlib
+        import tools.skill_manager_tool
+        importlib.reload(tools.skill_manager_tool)
+        from tools.skill_manager_tool import _skill_not_found_error
+
+        err = _skill_not_found_error("everywhere-skill")
+        assert "default" in err
+        assert "coder" in err
+        # Switch-profiles hint
+        assert "hermes -p" in err
+
+    def test_genuinely_missing_skill_keeps_helpful_hint(
+        self, fake_hermes, monkeypatch
+    ):
+        """When no profile has the skill, error falls back to skills_list hint."""
+        import importlib
+        import tools.skill_manager_tool
+        importlib.reload(tools.skill_manager_tool)
+        from tools.skill_manager_tool import _skill_not_found_error
+
+        err = _skill_not_found_error("totally-imaginary-skill")
+        assert "not found in active profile 'hermes-security'" in err
+        assert "skills_list" in err
+
+
+# ---------------------------------------------------------------------------
+# System prompt active-profile line (item B)
+# ---------------------------------------------------------------------------
+
+
+class TestSystemPromptActiveProfile:
+    def test_default_profile_line_in_prompt(self, tmp_path, monkeypatch):
+        """When active profile is 'default', the prompt names it and warns
+        about ~/.hermes/profiles/<name>/."""
+        # Don't set HERMES_HOME — falls back to default.
+        import agent.file_safety as fs
+        monkeypatch.setattr(fs, "_hermes_home_path", lambda: tmp_path / "fake")
+        monkeypatch.setattr(fs, "_hermes_root_path", lambda: tmp_path / "fake")
+
+        from agent.file_safety import _resolve_active_profile_name
+        assert _resolve_active_profile_name() == "default"
+        # Build the line manually to pin the contract — the prompt builder
+        # is too heavy to instantiate end-to-end in a unit test.
+        # See agent/system_prompt.py for the exact wording.
+
+    def test_named_profile_line_in_prompt_text(self, fake_hermes):
+        """When active profile is 'hermes-security', the prompt warns
+        explicitly about NOT modifying default's skills/plugins/cron/memories."""
+        # Spot-check by reading the source — the contract is:
+        # (1) names the active profile, (2) names the default-profile
+        # paths, (3) says "do not modify another profile's" without
+        # explicit user direction.
+        from pathlib import Path
+        src = Path("agent/system_prompt.py").read_text()
+        assert "Active Hermes profile" in src
+        assert "cross_profile=True" in src
+        assert "~/.hermes/profiles/" in src
+        # Both branches present (default and named profile).
+        assert "Active Hermes profile: default" in src
+        assert "Active Hermes profile: {active_profile}" in src
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@ -60,6 +60,113 @@ class TestIsWriteDenied:
    def test_tilde_expansion(self):
        assert _is_write_denied("~/.ssh/authorized_keys") is True

+    @pytest.mark.parametrize(
+        "path",
+        [
+            "auth.json",
+            "config.yaml",
+            "webhook_subscriptions.json",
+            "mcp-tokens/token1.json",
+            "mcp-tokens/subdir/token2.json",
+            "pairing/telegram-approved.json",
+            "pairing/discord-approved.json",
+            "pairing/telegram-pending.json",
+            "pairing",
+        ],
+    )
+    def test_hermes_control_files_and_mcp_tokens_denied(self, path):
+        """Hermes control files and mcp-tokens/pairing entries must be write-denied."""
+        from hermes_constants import get_hermes_home
+        hermes_home = get_hermes_home()
+        full_path = str(hermes_home / path)
+        assert _is_write_denied(full_path) is True
+
+    @pytest.mark.parametrize(
+        "path",
+        [
+            "dummy/../config.yaml",
+            "./auth.json",
+            "mcp-tokens/../config.yaml",
+        ],
+    )
+    def test_hermes_control_files_traversal_denied(self, path):
+        """Path traversal attempts to control files must be blocked by realpath."""
+        from hermes_constants import get_hermes_home
+        hermes_home = get_hermes_home()
+        full_path = str(hermes_home / path)
+        assert _is_write_denied(full_path) is True
+
+    @pytest.mark.parametrize(
+        "path",
+        [
+            "/tmp/standard_file.txt",
+            "~/projects/myapp/main.py",
+            "/var/log/app.log",
+        ],
+    )
+    def test_standard_paths_allowed(self, path):
+        """Unrelated paths must still be allowed."""
+        assert _is_write_denied(path) is False
+
+    @pytest.mark.parametrize(
+        "name",
+        ["auth.json", "config.yaml", "webhook_subscriptions.json"],
+    )
+    def test_control_files_protected_in_profile_mode(self, tmp_path, monkeypatch, name):
+        """Under a profile, BOTH <profile>/X and <root>/X must be denied (#15981 shape).
+
+        Without the root-level pass, a profile-mode session leaves the
+        global ~/.hermes/{auth.json,config.yaml,webhook_subscriptions.json}
+        writable — the same gap PR #15981 fixed for .env.
+        """
+        # Simulate a profile-mode HERMES_HOME layout:
+        #   <root>/profiles/coder/{auth.json,config.yaml,...}
+        #   <root>/{auth.json,config.yaml,...}        ← must also be denied
+        root = tmp_path / "hermes"
+        profile = root / "profiles" / "coder"
+        profile.mkdir(parents=True)
+        monkeypatch.setenv("HERMES_HOME", str(profile))
+
+        # Profile copy
+        assert _is_write_denied(str(profile / name)) is True
+        # Root copy — the gap this widening closes
+        assert _is_write_denied(str(root / name)) is True
+
+    def test_mcp_tokens_dir_protected_in_profile_mode(self, tmp_path, monkeypatch):
+        """mcp-tokens/ under profile AND under root must both be denied."""
+        root = tmp_path / "hermes"
+        profile = root / "profiles" / "coder"
+        profile.mkdir(parents=True)
+        monkeypatch.setenv("HERMES_HOME", str(profile))
+
+        assert _is_write_denied(str(profile / "mcp-tokens" / "tok.json")) is True
+        assert _is_write_denied(str(root / "mcp-tokens" / "tok.json")) is True
+        # The directory itself must also be denied (not just files inside)
+        assert _is_write_denied(str(root / "mcp-tokens")) is True
+
+    def test_pairing_dir_denied(self, tmp_path, monkeypatch):
+        """Regression: pairing/ must be write-denied under both profile and root.
+
+        PR #30383 introduced ~/.hermes/pairing/{platform}-approved.json as the
+        gateway access-control list. Without this block, a prompt-injected agent
+        can write arbitrary user IDs into an approved file, granting persistent
+        gateway access without going through the pairing code flow — the same
+        threat class that motivated protecting webhook_subscriptions.json.
+        """
+        root = tmp_path / "hermes"
+        profile = root / "profiles" / "coder"
+        profile.mkdir(parents=True)
+        monkeypatch.setenv("HERMES_HOME", str(profile))
+
+        # Active profile pairing entries
+        assert _is_write_denied(str(profile / "pairing" / "telegram-approved.json")) is True
+        assert _is_write_denied(str(profile / "pairing" / "discord-pending.json")) is True
+        # The directory itself
+        assert _is_write_denied(str(profile / "pairing")) is True
+        # Root pairing entries (profile mode — same shape as mcp-tokens gap)
+        assert _is_write_denied(str(root / "pairing" / "telegram-approved.json")) is True
+        assert _is_write_denied(str(root / "pairing")) is True
+


 # =========================================================================
--- a/tests/tools/test_local_interrupt_cleanup.py
+++ b/tests/tools/test_local_interrupt_cleanup.py
@ -48,8 +48,14 @@ def _process_group_snapshot(pgid: int) -> str:
    ).stdout.strip()


-def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
-    """Wait for a process group to disappear under loaded xdist hosts."""
+def _wait_for_pgid_exit(pgid: int, timeout: float = 30.0) -> bool:
+    """Wait for a process group to disappear under loaded xdist hosts.
+
+    The cleanup chain is: SIGTERM → 3s TimeoutStopSec → SIGKILL → reap.
+    Under heavy xdist load (40 parallel workers, 6-shard CI), the full
+    sequence can exceed 10s. Default timeout is generous to avoid CI
+    flakes; in practice the wait returns in <1s on quiet hosts.
+    """
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        if not _pgid_still_alive(pgid):
@ -166,9 +172,11 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
        assert ret == 1, f"SetAsyncExc returned {ret}, expected 1"

        # Give the worker a moment to: hit the exception at the next poll,
-        # run the except-block cleanup (_kill_process), and exit.
-        t.join(timeout=5.0)
-        assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
+        # run the except-block cleanup (_kill_process), and exit.  Under
+        # xdist load the SIGTERM → 3s wait → SIGKILL chain can take longer
+        # than 5s before the worker's join() returns; bumped to 15s.
+        t.join(timeout=15.0)
+        assert not t.is_alive(), "worker didn't exit within 15 s of the interrupt"

        # The critical assertion: the subprocess GROUP must be dead.  Not
        # just the bash wrapper — the 'sleep 30' child too. Under xdist load,
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@ -1462,6 +1462,27 @@ class TestHTTPConfig:

        asyncio.run(_test())

+    def test_stdio_unavailable_raises_importerror_not_nameerror(self):
+        """Regression test for #30904.
+
+        When the mcp SDK isn't installed, ``_run_stdio`` previously leaked a
+        bare ``NameError: name 'StdioServerParameters' is not defined``. The
+        gate now raises a clear ``ImportError`` with install instructions,
+        mirroring ``_run_http``'s behaviour when the HTTP transport is
+        unavailable.
+        """
+        from tools.mcp_tool import MCPServerTask
+
+        server = MCPServerTask("local")
+        config = {"command": "python3", "args": ["/tmp/echo.py"]}
+
+        async def _test():
+            with patch("tools.mcp_tool._MCP_AVAILABLE", False):
+                with pytest.raises(ImportError, match=r"mcp.*SDK"):
+                    await server._run_stdio(config)
+
+        asyncio.run(_test())
+
    def test_http_seeds_initial_protocol_header(self):
        from tools.mcp_tool import LATEST_PROTOCOL_VERSION, MCPServerTask

--- a/tests/tools/test_memory_tool.py
+++ b/tests/tools/test_memory_tool.py
@ -255,3 +255,128 @@ class TestMemoryToolDispatcher:
    def test_remove_requires_old_text(self, store):
        result = json.loads(memory_tool(action="remove", store=store))
        assert result["success"] is False
+
+
+# =========================================================================
+# External drift guard (#26045)
+#
+# An external writer — patch tool, shell append, manual edit, or sister
+# session — can grow MEMORY.md beyond the tool's mental model: no §
+# delimiters, content that would all collapse into a single "entry" larger
+# than the char limit. Pre-fix, the next memory(action=replace) from a
+# session with stale in-memory state truncated that giant entry, silently
+# discarding the appended bytes. Reproduced in production on 2026-05-14 —
+# ~8KB of structured vendor / standing-orders / pinboard content destroyed
+# by a sister session's replace.
+# =========================================================================
+
+
+class TestExternalDriftGuard:
+    """Mutations must refuse to flush when on-disk content shows external drift."""
+
+    def _plant_drift(self, store, target="memory"):
+        """Append free-form content (no § delimiters) past char_limit."""
+        path = store._path_for(target)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        # 800 chars per entry × 3 sections == ~2.4KB without delimiters,
+        # well over the test fixture's 500-char limit.
+        block = "\n\n## Vendor Master\n" + "x" * 800
+        block += "\n\n## Standing Orders\n" + "y" * 800
+        block += "\n\n## Pin Board\n" + "z" * 800
+        existing = path.read_text(encoding="utf-8") if path.exists() else ""
+        path.write_text(existing + block, encoding="utf-8")
+        return path
+
+    def test_replace_refuses_on_drift(self, store):
+        store.add("memory", "User likes brevity.")
+        path = self._plant_drift(store)
+        original_size = path.stat().st_size
+
+        result = store.replace("memory", "User likes", "User prefers concise.")
+
+        assert result["success"] is False
+        assert "drift_backup" in result
+        # On-disk file is UNTOUCHED — that's the point.
+        assert path.stat().st_size == original_size
+        assert "Vendor Master" in path.read_text()
+        # Backup exists with the drifted content.
+        bak = result["drift_backup"]
+        assert Path(bak).exists()
+        assert "Vendor Master" in Path(bak).read_text()
+
+    def test_add_refuses_on_drift(self, store):
+        store.add("memory", "Existing.")
+        path = self._plant_drift(store)
+        original = path.read_text()
+
+        result = store.add("memory", "New entry under drift.")
+
+        assert result["success"] is False
+        assert "drift_backup" in result
+        assert path.read_text() == original  # untouched
+
+    def test_remove_refuses_on_drift(self, store):
+        store.add("memory", "Target entry to remove.")
+        path = self._plant_drift(store)
+        original = path.read_text()
+
+        result = store.remove("memory", "Target entry")
+
+        assert result["success"] is False
+        assert "drift_backup" in result
+        assert path.read_text() == original  # untouched
+
+    def test_clean_file_does_not_trigger_drift(self, store):
+        """A normally-written file (just below char_limit, §-delimited) is fine."""
+        # Two tool-shaped entries totaling under the 500-char limit.
+        store.add("memory", "Entry one — normal length.")
+        store.add("memory", "Entry two — also normal.")
+
+        result = store.add("memory", "Entry three.")
+        assert result["success"] is True
+        assert "drift_backup" not in result
+
+        result = store.replace("memory", "Entry two", "Entry two replaced.")
+        assert result["success"] is True
+
+    def test_error_message_points_at_remediation(self, store):
+        """The error string must reference the backup AND remediation steps."""
+        store.add("memory", "Initial.")
+        self._plant_drift(store)
+
+        result = store.replace("memory", "Initial", "Replacement.")
+        assert result["success"] is False
+        # The model has to know what file to look at and what to do.
+        assert ".bak." in result["error"]
+        assert "remediation" in result
+        assert "26045" in result["error"]  # tracking-issue back-reference
+
+    def test_drift_guard_also_protects_user_target(self, store):
+        """USER.md gets the same guarantee as MEMORY.md."""
+        store.add("user", "Some preference.")
+        path = self._plant_drift(store, target="user")
+        original_size = path.stat().st_size
+
+        result = store.replace("user", "Some preference", "New preference.")
+        assert result["success"] is False
+        assert path.stat().st_size == original_size
+
+    def test_drift_backup_filename_is_unique_per_invocation(self, store):
+        """Two drift refusals close together must not collide on bak.<ts>.
+
+        If two refusals share the same epoch second, the second call would
+        overwrite the first .bak. The current implementation accepts that
+        — both files describe the same on-disk state — but pin the path
+        format here so any future change has to think about it.
+        """
+        store.add("memory", "Initial.")
+        self._plant_drift(store)
+
+        r1 = store.replace("memory", "Initial", "Replacement.")
+        r2 = store.add("memory", "Another.")
+        assert r1.get("drift_backup")
+        assert r2.get("drift_backup")
+        # Same epoch second is the expected collision case — both point
+        # at the same snapshot. Different second is also fine.
+        assert ".bak." in r1["drift_backup"]
+        assert ".bak." in r2["drift_backup"]
--- a/tests/tools/test_notify_on_complete.py
+++ b/tests/tools/test_notify_on_complete.py
@ -348,3 +348,158 @@ class TestCompletionConsumed:
        result = registry.poll("proc_running")
        assert result["status"] == "running"
        assert not registry.is_completion_consumed("proc_running")
+
+
+# ---------------------------------------------------------------------------
+# Silent-background-process hint
+#
+# background=True without notify_on_complete=True OR watch_patterns runs
+# the process silently — the agent has no way to learn it finished short
+# of calling process(action="poll") explicitly. The tool result must
+# include a "hint" field that nudges the agent toward
+# notify_on_complete=True for bounded tasks. May 2026 PR #31231 incident:
+# bg CI poller exited green, agent never noticed, user had to surface it.
+# ---------------------------------------------------------------------------
+
+
+def _silent_bg_base_config(tmp_path):
+    return {
+        "env_type": "local",
+        "docker_image": "",
+        "singularity_image": "",
+        "modal_image": "",
+        "daytona_image": "",
+        "cwd": str(tmp_path),
+        "timeout": 30,
+    }
+
+
+def _silent_bg_harness(monkeypatch, tmp_path):
+    """Common test fixture: patch enough of terminal_tool to spawn a fake
+    background process and capture the JSON result the agent sees."""
+    import tools.terminal_tool as terminal_tool_module
+    from tools import process_registry as process_registry_module
+    from types import SimpleNamespace
+
+    config = _silent_bg_base_config(tmp_path)
+    dummy_env = SimpleNamespace(env={})
+
+    def fake_spawn_local(**kwargs):
+        return SimpleNamespace(
+            id="proc_silent_test",
+            pid=4242,
+            notify_on_complete=False,
+            watcher_platform="",
+            watcher_chat_id="",
+            watcher_user_id="",
+            watcher_user_name="",
+            watcher_thread_id="",
+            watcher_message_id="",
+            watcher_interval=0,
+        )
+
+    monkeypatch.setattr(terminal_tool_module, "_get_env_config", lambda: config)
+    monkeypatch.setattr(terminal_tool_module, "_start_cleanup_thread", lambda: None)
+    monkeypatch.setattr(terminal_tool_module, "_check_all_guards", lambda *_args, **_kwargs: {"approved": True})
+    monkeypatch.setattr(process_registry_module.process_registry, "spawn_local", fake_spawn_local)
+    monkeypatch.setitem(terminal_tool_module._active_environments, "default", dummy_env)
+    monkeypatch.setitem(terminal_tool_module._last_activity, "default", 0.0)
+    return terminal_tool_module
+
+
+def test_background_without_notify_emits_silent_process_hint(monkeypatch, tmp_path):
+    """The footgun case (May 2026 PR #31231): bg=True alone runs silently
+    and the agent has no signal it finished. Tool must nudge."""
+    tt = _silent_bg_harness(monkeypatch, tmp_path)
+    try:
+        result = json.loads(
+            tt.terminal_tool(
+                command="while true; do gh pr checks 999; sleep 30; done",
+                background=True,
+            )
+        )
+    finally:
+        tt._active_environments.pop("default", None)
+        tt._last_activity.pop("default", None)
+
+    assert result["session_id"] == "proc_silent_test"
+    hint = result.get("hint", "")
+    assert hint, "Silent background process must include a hint field"
+    assert "notify_on_complete" in hint, (
+        "Hint must name the corrective flag so the agent can self-correct"
+    )
+    assert "silent" in hint.lower() or "no way to learn" in hint.lower(), (
+        "Hint must explain the failure mode, not just suggest the fix"
+    )
+
+
+def test_background_with_notify_does_not_emit_hint(monkeypatch, tmp_path):
+    """The correct shape — bg+notify together — must not nag."""
+    tt = _silent_bg_harness(monkeypatch, tmp_path)
+    try:
+        result = json.loads(
+            tt.terminal_tool(
+                command="pytest tests/",
+                background=True,
+                notify_on_complete=True,
+            )
+        )
+    finally:
+        tt._active_environments.pop("default", None)
+        tt._last_activity.pop("default", None)
+
+    assert "hint" not in result, (
+        f"Correct usage must not emit a hint, got: {result.get('hint')!r}"
+    )
+    assert result.get("notify_on_complete") is True
+
+
+def test_background_with_watch_patterns_does_not_emit_hint(monkeypatch, tmp_path):
+    """watch_patterns is the other legitimate non-silent shape — also no hint."""
+    tt = _silent_bg_harness(monkeypatch, tmp_path)
+    try:
+        result = json.loads(
+            tt.terminal_tool(
+                command="uvicorn app:server --port 8080",
+                background=True,
+                watch_patterns=["Application startup complete"],
+            )
+        )
+    finally:
+        tt._active_environments.pop("default", None)
+        tt._last_activity.pop("default", None)
+
+    assert "hint" not in result, (
+        f"watch_patterns shape must not emit a silent-process hint, got: {result.get('hint')!r}"
+    )
+
+
+def test_foreground_command_does_not_emit_hint(monkeypatch, tmp_path):
+    """Hint only applies to background processes — foreground returns its
+    result synchronously and the agent always sees the outcome."""
+    tt = _silent_bg_harness(monkeypatch, tmp_path)
+
+    # Foreground path doesn't go through spawn_local. Patch the local-env
+    # exec method to short-circuit to a clean exit so the test doesn't
+    # actually shell out.
+    from types import SimpleNamespace
+    dummy_env = SimpleNamespace(
+        env={},
+        execute=lambda *a, **kw: {"output": "done", "exit_code": 0, "error": None},
+    )
+    monkeypatch.setitem(tt._active_environments, "default", dummy_env)
+
+    try:
+        result = json.loads(
+            tt.terminal_tool(
+                command="echo hello",
+                background=False,
+            )
+        )
+    finally:
+        tt._active_environments.pop("default", None)
+        tt._last_activity.pop("default", None)
+
+    assert "hint" not in result, (
+        f"Foreground commands must not emit the background-silence hint, got: {result.get('hint')!r}"
+    )
--- a/tests/tools/test_pr_6656_regressions.py
+++ b/tests/tools/test_pr_6656_regressions.py
@ -0,0 +1,287 @@
+"""Regression tests for PR #6656 — skill uninstall + bundle hash + pairing lock.
+
+Three independent fixes that were salvaged together:
+
+1. ``uninstall_skill`` path traversal: ``install_path`` comes from a JSON
+   file on disk; a malicious skill could write ``install_path: "../../"``
+   and trigger ``shutil.rmtree`` against parent directories. Guarded with
+   ``Path.resolve().is_relative_to(SKILLS_DIR.resolve())``.
+
+2. ``bundle_content_hash`` / ``content_hash`` filename inclusion: the
+   previous hash mixed only file CONTENTS, so swapping ``SKILL.md`` and
+   ``scripts/run.sh`` contents between two paths produced the same digest.
+   Now both functions prefix each entry with ``rel_path + \\x00`` and
+   stay symmetric (one on disk, one on in-memory bundle).
+
+3. ``PairingStore.list_pending`` TOCTOU: previously called
+   ``_cleanup_expired`` (which writes the JSON file) without holding
+   ``self._lock``, racing with ``generate_code`` / ``approve_code``.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from tools.skills_hub import (
+    SkillBundle,
+    bundle_content_hash,
+    uninstall_skill,
+)
+from tools.skills_guard import content_hash
+
+
+# =============================================================================
+# uninstall_skill: path traversal guard
+# =============================================================================
+
+
+class TestUninstallPathTraversal:
+    """The ``install_path`` field in ``lock.json`` is attacker-controllable
+    if a malicious skill is ever installed (or if the hub's lockfile is
+    corrupted). The uninstall path must refuse anything that resolves
+    outside ``SKILLS_DIR``.
+    """
+
+    @pytest.fixture
+    def hub_setup(self, tmp_path, monkeypatch):
+        """Build a hub directory tree with a malicious lock.json entry.
+
+        ``HubLockFile`` binds its default ``path`` argument at def time
+        against the module-level ``LOCK_FILE`` constant, so monkey-patching
+        ``LOCK_FILE`` alone is not enough — we also need to rebind the
+        function default. Patching ``HubLockFile.__init__.__defaults__``
+        is the standard tool for this.
+        """
+        import tools.skills_hub as hub
+        skills_dir = tmp_path / "skills"
+        hub_dir = skills_dir / ".hub"
+        hub_dir.mkdir(parents=True)
+        lock_path = hub_dir / "lock.json"
+
+        monkeypatch.setattr(hub, "SKILLS_DIR", skills_dir)
+        monkeypatch.setattr(hub, "HUB_DIR", hub_dir)
+        monkeypatch.setattr(hub, "LOCK_FILE", lock_path)
+        monkeypatch.setattr(hub, "AUDIT_LOG", hub_dir / "audit.log")
+        # Rebind HubLockFile.__init__'s default `path=` arg so
+        # `HubLockFile()` (no args) picks up the new lock path.
+        monkeypatch.setattr(
+            hub.HubLockFile.__init__,
+            "__defaults__",
+            (lock_path,),
+        )
+
+        # A real directory outside skills_dir that the traversal would
+        # delete if the guard fails.
+        victim = tmp_path / "do-not-delete"
+        victim.mkdir()
+        (victim / "important.txt").write_text("data")
+        return skills_dir, hub_dir, victim
+
+    def _write_lock(self, hub_dir: Path, entries: dict) -> None:
+        lock_path = hub_dir / "lock.json"
+        lock_path.write_text(json.dumps({"version": 1, "installed": entries}))
+
+    def test_traversal_via_parent_segments_rejected(self, hub_setup):
+        """install_path: "../do-not-delete" must NOT escape SKILLS_DIR."""
+        skills_dir, hub_dir, victim = hub_setup
+        self._write_lock(hub_dir, {
+            "evil": {
+                "install_path": "../do-not-delete",
+                "source": "https://example.com",
+                "version": "1.0",
+            },
+        })
+
+        ok, msg = uninstall_skill("evil")
+
+        assert ok is False
+        assert "outside" in msg or "resolves" in msg or "skills directory" in msg
+        # The victim directory MUST still exist.
+        assert victim.exists()
+        assert (victim / "important.txt").exists()
+
+    def test_absolute_path_rejected(self, hub_setup):
+        """install_path that's an absolute path outside SKILLS_DIR must be refused."""
+        skills_dir, hub_dir, victim = hub_setup
+        self._write_lock(hub_dir, {
+            "evil": {
+                "install_path": str(victim),
+                "source": "https://example.com",
+                "version": "1.0",
+            },
+        })
+
+        ok, msg = uninstall_skill("evil")
+
+        # SKILLS_DIR / "<absolute>" still results in an absolute path,
+        # which when resolved is outside skills_dir. Must be refused.
+        assert ok is False
+        assert victim.exists()
+
+    def test_symlink_escape_rejected(self, tmp_path, hub_setup):
+        """Symlinks inside SKILLS_DIR that point outside must be refused
+        after realpath resolution."""
+        skills_dir, hub_dir, victim = hub_setup
+        # Create a "skill" that's actually a symlink to victim
+        evil_link = skills_dir / "trapdoor"
+        evil_link.symlink_to(victim)
+
+        self._write_lock(hub_dir, {
+            "trap": {
+                "install_path": "trapdoor",
+                "source": "https://example.com",
+                "version": "1.0",
+            },
+        })
+
+        ok, msg = uninstall_skill("trap")
+
+        # realpath resolves the symlink → outside skills_dir → refused.
+        assert ok is False
+        assert victim.exists()
+        assert (victim / "important.txt").exists()
+
+    def test_legitimate_skill_uninstall_still_works(self, hub_setup):
+        """The guard must NOT block a normal skill directory inside SKILLS_DIR."""
+        skills_dir, hub_dir, _victim = hub_setup
+        legit = skills_dir / "category" / "my-skill"
+        legit.mkdir(parents=True)
+        (legit / "SKILL.md").write_text("test")
+
+        self._write_lock(hub_dir, {
+            "my-skill": {
+                "install_path": "category/my-skill",
+                "source": "https://example.com",
+                "trust_level": "community",
+                "version": "1.0",
+            },
+        })
+
+        ok, msg = uninstall_skill("my-skill")
+
+        assert ok is True
+        assert not legit.exists()
+
+
+# =============================================================================
+# Bundle / disk hash symmetry + filename inclusion
+# =============================================================================
+
+
+class TestBundleHashFilenameSensitivity:
+    """Hashes must change when filenames are swapped, even if combined
+    contents stay identical. ``bundle_content_hash`` (in-memory) and
+    ``content_hash`` (on-disk) must stay symmetric — they're used to
+    detect skill drift between an installed bundle and its source.
+    """
+
+    def _make_bundle(self, files: dict) -> SkillBundle:
+        return SkillBundle(
+            name="test",
+            files=files,
+            source="test",
+            identifier="test/test",
+            trust_level="community",
+        )
+
+    def test_filename_swap_changes_hash(self):
+        """Swapping content between SKILL.md and scripts/run.sh must
+        produce a different hash. Without the filename in the hash,
+        these two bundles would have looked identical."""
+        a = self._make_bundle({"SKILL.md": "hello", "scripts/run.sh": "world"})
+        b = self._make_bundle({"SKILL.md": "world", "scripts/run.sh": "hello"})
+        assert bundle_content_hash(a) != bundle_content_hash(b)
+
+    def test_identical_bundles_same_hash(self):
+        """Sanity: equal content + paths = equal hash."""
+        a = self._make_bundle({"SKILL.md": "x", "run.sh": "y"})
+        b = self._make_bundle({"SKILL.md": "x", "run.sh": "y"})
+        assert bundle_content_hash(a) == bundle_content_hash(b)
+
+    def test_disk_hash_changes_on_filename_swap(self, tmp_path):
+        """``content_hash`` on disk must also be filename-sensitive,
+        so it stays symmetric with ``bundle_content_hash``."""
+        skill_a = tmp_path / "a"
+        skill_a.mkdir()
+        (skill_a / "SKILL.md").write_text("hello")
+        (skill_a / "run.sh").write_text("world")
+
+        skill_b = tmp_path / "b"
+        skill_b.mkdir()
+        (skill_b / "SKILL.md").write_text("world")
+        (skill_b / "run.sh").write_text("hello")
+
+        # Different filename↔content mappings = different hashes.
+        assert content_hash(skill_a) != content_hash(skill_b)
+
+    def test_bundle_and_disk_hash_match(self, tmp_path):
+        """Symmetry contract: the same skill, expressed as a SkillBundle
+        and as a directory tree, must produce the same digest. If this
+        fails, ``check_for_skill_updates`` will flag every clean
+        install as drifted."""
+        skill_dir = tmp_path / "skill"
+        skill_dir.mkdir()
+        (skill_dir / "SKILL.md").write_text("hello")
+        (skill_dir / "scripts").mkdir()
+        (skill_dir / "scripts" / "run.sh").write_text("world")
+
+        bundle = self._make_bundle({
+            "SKILL.md": "hello",
+            "scripts/run.sh": "world",
+        })
+
+        assert bundle_content_hash(bundle) == content_hash(skill_dir)
+
+
+# =============================================================================
+# PairingStore.list_pending: must hold the lock
+# =============================================================================
+
+
+class TestListPendingLock:
+    """list_pending writes via _cleanup_expired. Without the lock,
+    a concurrent generate_code or approve_code can race against the
+    write, potentially clobbering a pending approval."""
+
+    def test_list_pending_acquires_lock(self, tmp_path):
+        """Source-grep contract: ``list_pending`` body must be wrapped
+        in ``with self._lock:``. If anyone unwraps it again, the TOCTOU
+        bug returns."""
+        import gateway.pairing as _pairing_mod
+        source = Path(_pairing_mod.__file__).read_text(encoding="utf-8")
+        # Find the list_pending function body and assert the lock
+        # context manager appears inside it. We grep the function
+        # source rather than runtime-introspect because the racy
+        # behaviour is hard to deterministically reproduce in a test.
+        lines = source.splitlines()
+        in_func = False
+        seen_lock = False
+        for line in lines:
+            if line.startswith("    def list_pending("):
+                in_func = True
+                continue
+            if in_func:
+                if line.startswith("    def "):
+                    break  # next function
+                if "with self._lock:" in line:
+                    seen_lock = True
+                    break
+        assert seen_lock, (
+            "list_pending must wrap its body in `with self._lock:` — "
+            "without it, _cleanup_expired's file write races with "
+            "concurrent generate_code/approve_code."
+        )
+
+    def test_list_pending_returns_correct_data(self, tmp_path):
+        """End-to-end smoke: even with the lock held, basic operation works."""
+        from gateway.pairing import PairingStore
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            store = PairingStore()
+            store.generate_code("telegram", "user1", "Alice")
+            pending = store.list_pending("telegram")
+        assert len(pending) == 1
+        assert pending[0]["user_id"] == "user1"
--- a/tests/tools/test_process_registry.py
+++ b/tests/tools/test_process_registry.py
@ -1007,3 +1007,163 @@ def test_drain_notifications_empty_queue():

    results = process_registry.drain_notifications()
    assert results == []
+
+
+# ---------------------------------------------------------------------------
+# _terminate_host_pid — cross-platform process-tree termination
+# ---------------------------------------------------------------------------
+
+
+class TestTerminateHostPidWindows:
+    """Windows branch uses ``taskkill /T /F`` — the documented MS tree-kill
+    primitive. We can't use psutil's ``children(recursive=True)`` /
+    ``.terminate()`` path on Windows because (1) Windows doesn't maintain
+    a Unix-style process tree so the walk is unreliable, and (2)
+    ``Process.terminate()`` on Windows is ``TerminateProcess()`` for the
+    target handle only, not the tree.
+    """
+
+    def test_windows_invokes_taskkill_with_tree_and_force_flags(self, monkeypatch):
+        """The Windows branch must shell out to ``taskkill /PID N /T /F``."""
+        from tools import process_registry as pr
+
+        captured = {}
+
+        def fake_run(args, **kwargs):
+            captured["args"] = args
+            captured["kwargs"] = kwargs
+            return MagicMock(returncode=0, stderr="", stdout="")
+
+        monkeypatch.setattr(pr, "_IS_WINDOWS", True)
+        monkeypatch.setattr(pr.subprocess, "run", fake_run)
+
+        pr.ProcessRegistry._terminate_host_pid(12345)
+
+        assert captured["args"][0] == "taskkill"
+        assert "/PID" in captured["args"]
+        assert "12345" in captured["args"]
+        assert "/T" in captured["args"], "Tree flag required to reach descendants"
+        assert "/F" in captured["args"], "Force flag required for headless Chromium"
+
+    def test_windows_falls_back_to_os_kill_when_taskkill_missing(self, monkeypatch):
+        """If ``taskkill.exe`` is somehow unavailable, fall back to a bare
+        ``os.kill(pid, SIGTERM)`` so we at least try to kill the parent."""
+        from tools import process_registry as pr
+
+        kill_calls = []
+
+        def fake_run(*args, **kwargs):
+            raise FileNotFoundError("taskkill not found")
+
+        def fake_kill(pid, sig):
+            kill_calls.append((pid, sig))
+
+        monkeypatch.setattr(pr, "_IS_WINDOWS", True)
+        monkeypatch.setattr(pr.subprocess, "run", fake_run)
+        monkeypatch.setattr(pr.os, "kill", fake_kill)
+
+        pr.ProcessRegistry._terminate_host_pid(12345)
+
+        assert kill_calls == [(12345, signal.SIGTERM)]
+
+    def test_windows_does_not_call_psutil(self, monkeypatch):
+        """The Windows branch must NOT exercise the psutil tree-walk
+        (it's unreliable on Windows — see the function docstring)."""
+        from tools import process_registry as pr
+        import psutil
+
+        psutil_calls = []
+
+        class _BoomProcess:
+            def __init__(self, pid):
+                psutil_calls.append(("Process", pid))
+
+            def children(self, recursive=False):
+                psutil_calls.append(("children", recursive))
+                return []
+
+            def terminate(self):
+                psutil_calls.append(("terminate",))
+
+        def fake_run(args, **kwargs):
+            return MagicMock(returncode=0, stderr="", stdout="")
+
+        monkeypatch.setattr(pr, "_IS_WINDOWS", True)
+        monkeypatch.setattr(pr.subprocess, "run", fake_run)
+        monkeypatch.setattr(psutil, "Process", _BoomProcess)
+
+        pr.ProcessRegistry._terminate_host_pid(12345)
+
+        assert psutil_calls == [], (
+            f"Windows branch must not touch psutil, but saw {psutil_calls!r}"
+        )
+
+
+class TestTerminateHostPidPosix:
+    """POSIX branch walks the tree via psutil and SIGTERMs children first."""
+
+    def test_posix_walks_tree_and_terminates_children_then_parent(self, monkeypatch):
+        from tools import process_registry as pr
+        import psutil
+
+        terminate_order = []
+
+        class _FakeChild:
+            def __init__(self, pid):
+                self.pid = pid
+
+            def terminate(self):
+                terminate_order.append(self.pid)
+
+        class _FakeParent:
+            def __init__(self, pid):
+                self.pid = pid
+
+            def children(self, recursive=False):
+                assert recursive is True
+                return [_FakeChild(101), _FakeChild(102), _FakeChild(103)]
+
+            def terminate(self):
+                terminate_order.append(self.pid)
+
+        monkeypatch.setattr(pr, "_IS_WINDOWS", False)
+        monkeypatch.setattr(psutil, "Process", _FakeParent)
+
+        pr.ProcessRegistry._terminate_host_pid(12345)
+
+        assert terminate_order == [101, 102, 103, 12345], (
+            "Children must be terminated before the parent"
+        )
+
+    def test_posix_no_such_process_swallowed(self, monkeypatch):
+        from tools import process_registry as pr
+        import psutil
+
+        def boom(pid):
+            raise psutil.NoSuchProcess(pid)
+
+        monkeypatch.setattr(pr, "_IS_WINDOWS", False)
+        monkeypatch.setattr(psutil, "Process", boom)
+
+        # Must not raise.
+        pr.ProcessRegistry._terminate_host_pid(999999999)
+
+    def test_posix_oserror_falls_back_to_os_kill(self, monkeypatch):
+        from tools import process_registry as pr
+        import psutil
+
+        def boom(pid):
+            raise PermissionError("can't read /proc")
+
+        kill_calls = []
+
+        def fake_kill(pid, sig):
+            kill_calls.append((pid, sig))
+
+        monkeypatch.setattr(pr, "_IS_WINDOWS", False)
+        monkeypatch.setattr(psutil, "Process", boom)
+        monkeypatch.setattr(pr.os, "kill", fake_kill)
+
+        pr.ProcessRegistry._terminate_host_pid(12345)
+
+        assert kill_calls == [(12345, signal.SIGTERM)]
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@ -28,16 +28,93 @@ def _reset_signal_scheduler():

 from gateway.config import Platform
 from tools.send_message_tool import (
-    _derive_forum_thread_name,
    _is_telegram_thread_not_found,
    _parse_target_ref,
-    _send_discord,
    _send_matrix_via_adapter,
    _send_signal,
    _send_telegram,
    _send_to_platform,
    send_message_tool,
 )
+# Discord helpers moved to the plugin in #24325.  Import from the new path
+# and provide a thin ``_send_discord(token, ...)`` shim that mirrors the
+# pre-migration signature so the existing test bodies keep working.
+from plugins.platforms.discord.adapter import (
+    _DISCORD_CHANNEL_TYPE_PROBE_CACHE,
+    _derive_forum_thread_name,
+    _probe_is_forum_cached,
+    _remember_channel_is_forum,
+    _standalone_send,
+)
+
+
+async def _send_discord(
+    token,
+    chat_id,
+    message,
+    *,
+    thread_id=None,
+    media_files=None,
+):
+    """Pre-migration ``(token, chat_id, message, …)`` adapter around the
+    plugin's ``_standalone_send(pconfig, …)``.  Lets test bodies continue
+    to call ``_send_discord("tok", ...)`` without rewriting every signature.
+    """
+    pconfig = SimpleNamespace(token=token, extra={})
+    return await _standalone_send(
+        pconfig,
+        chat_id,
+        message,
+        thread_id=thread_id,
+        media_files=media_files,
+    )
+
+
+def _discord_entry():
+    """Return the live Discord PlatformEntry, importing lazily so plugin
+    discovery is forced exactly once and patches survive across tests."""
+    from hermes_cli.plugins import discover_plugins
+    from gateway.platform_registry import platform_registry
+    discover_plugins()
+    return platform_registry.get("discord")
+
+
+class _patch_discord_sender:
+    """Patch the Discord registry entry's ``standalone_sender_fn`` with the
+    given mock and translate the production ``(pconfig, ...)`` call shape
+    back to the pre-migration ``(token, ...)`` shape the test mocks expect.
+
+    Use as a context manager:
+
+        send_mock = AsyncMock(return_value={...})
+        with _patch_discord_sender(send_mock):
+            asyncio.run(_send_to_platform(Platform.DISCORD, ...))
+        send_mock.assert_awaited_once_with("tok", "chat", "msg",
+                                           thread_id=None, media_files=[])
+    """
+
+    def __init__(self, mock):
+        self._mock = mock
+        self._entry = None
+        self._original = None
+
+    async def _adapter(self, pconfig, chat_id, message, *, thread_id=None, media_files=None):
+        token = getattr(pconfig, "token", None)
+        return await self._mock(
+            token, chat_id, message,
+            thread_id=thread_id, media_files=media_files,
+        )
+
+    def __enter__(self):
+        self._entry = _discord_entry()
+        self._original = self._entry.standalone_sender_fn
+        self._entry.standalone_sender_fn = self._adapter
+        return self._mock
+
+    def __exit__(self, exc_type, exc, tb):
+        if self._entry is not None:
+            self._entry.standalone_sender_fn = self._original
+        return False


 def _run_async_immediately(coro):
@ -300,6 +377,37 @@ class TestSendMessageTool:
            user_id="user-123",
        )

+    def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path):
+        config, telegram_cfg = _make_config()
+        secret = tmp_path / "secret.pdf"
+        secret.write_bytes(b"%PDF secret")
+
+        with patch("gateway.config.load_gateway_config", return_value=config), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch("model_tools._run_async", side_effect=_run_async_immediately), \
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
+             patch("gateway.mirror.mirror_to_session", return_value=True):
+            result = json.loads(
+                send_message_tool(
+                    {
+                        "action": "send",
+                        "target": "telegram:12345",
+                        "message": f"hello\nMEDIA:{secret}",
+                    }
+                )
+            )
+
+        assert result["success"] is True
+        send_mock.assert_awaited_once_with(
+            Platform.TELEGRAM,
+            telegram_cfg,
+            "12345",
+            "hello",
+            thread_id=None,
+            media_files=[],
+            force_document=False,
+        )
+
    def test_top_level_send_failure_redacts_query_token(self):
        config, _telegram_cfg = _make_config()
        leaked = "very-secret-query-token-123456"
@ -446,7 +554,7 @@ class TestSendToPlatformChunking:
        """Messages exceeding the platform limit are split into multiple sends."""
        send = AsyncMock(return_value={"success": True, "message_id": "1"})
        long_msg = "word " * 1000  # ~5000 chars, well over Discord's 2000 limit
-        with patch("tools.send_message_tool._send_discord", send):
+        with _patch_discord_sender(send):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1176,7 +1284,7 @@ class TestSendToPlatformDiscordThread:
        """Discord platform with thread_id passes it to _send_discord."""
        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})

-        with patch("tools.send_message_tool._send_discord", send_mock):
+        with _patch_discord_sender(send_mock):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1196,7 +1304,7 @@ class TestSendToPlatformDiscordThread:
        """Discord platform without thread_id passes None."""
        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})

-        with patch("tools.send_message_tool._send_discord", send_mock):
+        with _patch_discord_sender(send_mock):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1360,7 +1468,7 @@ class TestSendToPlatformDiscordMedia:
        # A message long enough to get chunked (Discord limit is 2000)
        long_msg = "A" * 1900 + " " + "B" * 1900

-        with patch("tools.send_message_tool._send_discord", side_effect=mock_send_discord):
+        with _patch_discord_sender(AsyncMock(side_effect=mock_send_discord)):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1380,7 +1488,7 @@ class TestSendToPlatformDiscordMedia:
        """Short message (single chunk) gets media_files directly."""
        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})

-        with patch("tools.send_message_tool._send_discord", send_mock):
+        with _patch_discord_sender(send_mock):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1618,7 +1726,7 @@ class TestSendToPlatformDiscordForum:
        """Discord messages are routed through _send_discord, which handles forum detection."""
        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})

-        with patch("tools.send_message_tool._send_discord", send_mock):
+        with _patch_discord_sender(send_mock):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1637,7 +1745,7 @@ class TestSendToPlatformDiscordForum:
        """Thread ID is still passed through when sending to Discord."""
        send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})

-        with patch("tools.send_message_tool._send_discord", send_mock):
+        with _patch_discord_sender(send_mock):
            result = asyncio.run(
                _send_to_platform(
                    Platform.DISCORD,
@ -1775,11 +1883,11 @@ class TestForumProbeCache:
    """_DISCORD_CHANNEL_TYPE_PROBE_CACHE memoizes forum detection results."""

    def setup_method(self):
-        from tools import send_message_tool as smt
-        smt._DISCORD_CHANNEL_TYPE_PROBE_CACHE.clear()
+        from plugins.platforms.discord import adapter as discord_adapter
+        discord_adapter._DISCORD_CHANNEL_TYPE_PROBE_CACHE.clear()

    def test_cache_round_trip(self):
-        from tools.send_message_tool import (
+        from plugins.platforms.discord.adapter import (
            _probe_is_forum_cached,
            _remember_channel_is_forum,
        )
@ -1819,7 +1927,7 @@ class TestForumProbeCache:
        thread_session.post = MagicMock(return_value=thread_resp)

        # Two _send_discord calls: first does probe + thread-create; second should skip probe
-        from tools import send_message_tool as smt
+        from plugins.platforms.discord import adapter as discord_adapter

        sessions_created = []

@ -1837,7 +1945,7 @@ class TestForumProbeCache:
        with patch("aiohttp.ClientSession", side_effect=session_factory):
            result1 = asyncio.run(_send_discord("tok", "ch1", "first"))
        assert result1["success"] is True
-        assert smt._probe_is_forum_cached("ch1") is True
+        assert discord_adapter._probe_is_forum_cached("ch1") is True

        # Second call: cache hits, no new probe session needed. We need to only
        # return thread_session now since probe is skipped.
@ -2575,4 +2683,3 @@ class TestSendTelegramThreadNotFoundRetry:
        finally:
            if media_path and os.path.exists(media_path):
                os.unlink(media_path)
-
--- a/tests/tools/test_skills_ast_audit.py
+++ b/tests/tools/test_skills_ast_audit.py
@ -0,0 +1,103 @@
+"""Tests for tools.skills_ast_audit — opt-in AST diagnostic scanner."""
+
+import sys
+from pathlib import Path
+
+from tools.skills_ast_audit import ast_scan_path, format_ast_report
+
+
+def _pids(findings):
+    return [pid for (_f, _l, pid, _d) in findings]
+
+
+def test_bypass_payload_detected(tmp_path):
+    """The exact bypass shape from #7072 is caught."""
+    f = tmp_path / "exfil.py"
+    f.write_text(
+        "import importlib\n"
+        "parts = ['o', 's']\n"
+        "m = importlib.import_module(''.join(parts))\n"
+        "e = m.__dict__[''.join(['e','n','v'])]\n"
+    )
+    pids = _pids(ast_scan_path(f))
+    assert "dynamic_import" in pids
+    assert "importlib_import" in pids
+    assert "dict_access" in pids
+
+
+def test_syntax_error_does_not_crash(tmp_path):
+    f = tmp_path / "bad.py"
+    f.write_text("def broken(\n")
+    assert ast_scan_path(f) == []
+
+
+def test_recursion_error_does_not_crash(tmp_path):
+    f = tmp_path / "deep.py"
+    f.write_text("a" + ".x" * 5000 + "\n")
+    orig = sys.getrecursionlimit()
+    sys.setrecursionlimit(200)
+    try:
+        result = ast_scan_path(f)
+    finally:
+        sys.setrecursionlimit(orig)
+    assert isinstance(result, list)
+
+
+def test_importer_lookalike_not_flagged(tmp_path):
+    """`import importer` must NOT match — dot-bounded prefix."""
+    f = tmp_path / "ok.py"
+    f.write_text("import importer\nfrom importer import x\n")
+    assert _pids(ast_scan_path(f)) == []
+
+
+def test_literal_dunder_import_not_flagged(tmp_path):
+    """__import__('os') with a literal is not flagged (regex catches those)."""
+    f = tmp_path / "ok.py"
+    f.write_text("m = __import__('os')\n")
+    assert "dynamic_import_computed" not in _pids(ast_scan_path(f))
+
+
+def test_non_python_file_returns_empty(tmp_path):
+    f = tmp_path / "script.sh"
+    f.write_text("import importlib\n")
+    assert ast_scan_path(f) == []
+
+
+def test_directory_scans_recursively_and_skips_cache_dirs(tmp_path):
+    skill = tmp_path / "s"
+    skill.mkdir()
+    (skill / "main.py").write_text("import importlib\n")
+    (skill / "sub").mkdir()
+    (skill / "sub" / "u.py").write_text("from importlib.util import find_spec\n")
+    for d in ("__pycache__", ".venv", "venv", "node_modules"):
+        ignored = skill / d
+        ignored.mkdir()
+        (ignored / "junk.py").write_text("import importlib\n")
+    pids = _pids(ast_scan_path(skill))
+    assert pids.count("importlib_import") == 2
+
+
+def test_missing_path_returns_empty(tmp_path):
+    assert ast_scan_path(tmp_path / "does_not_exist") == []
+
+
+def test_dynamic_getattr_and_dict_access_detected(tmp_path):
+    f = tmp_path / "g.py"
+    f.write_text("name = 'x'\nv = getattr(o, name)\nv = o.__dict__[name]\n")
+    pids = _pids(ast_scan_path(f))
+    assert "dynamic_getattr" in pids
+    assert "dict_access" in pids
+
+
+def test_format_report_empty():
+    assert "No dynamic" in format_ast_report([])
+
+
+def test_format_report_with_findings():
+    findings = [
+        ("a.py", 1, "importlib_import", "import importlib — ..."),
+        ("a.py", 3, "dynamic_import", "importlib.import_module() — ..."),
+    ]
+    out = format_ast_report(findings, skill_name="test")
+    assert "test" in out and "a.py" in out and "L1" in out and "L3" in out
+    assert "diagnostic hints" in out
--- a/tests/tools/test_skills_guard.py
+++ b/tests/tools/test_skills_guard.py
@ -84,13 +84,13 @@ class TestDetermineVerdict:
        f = Finding("x", "high", "network", "f.py", 1, "m", "d")
        assert _determine_verdict([f]) == "caution"

-    def test_medium_finding_caution(self):
+    def test_medium_finding_safe(self):
        f = Finding("x", "medium", "structural", "f.py", 1, "m", "d")
-        assert _determine_verdict([f]) == "caution"
+        assert _determine_verdict([f]) == "safe"

-    def test_low_finding_caution(self):
+    def test_low_finding_safe(self):
        f = Finding("x", "low", "obfuscation", "f.py", 1, "m", "d")
-        assert _determine_verdict([f]) == "caution"
+        assert _determine_verdict([f]) == "safe"


 # ---------------------------------------------------------------------------
@ -145,21 +145,46 @@ class TestShouldAllowInstall:
        allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False)
        assert allowed is False

-    def test_force_overrides_dangerous_for_community(self):
+    def test_force_does_not_override_dangerous_for_community(self):
        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
        allowed, reason = should_allow_install(
            self._result("community", "dangerous", f), force=True
        )
-        assert allowed is True
-        assert "Force-installed" in reason
+        assert allowed is False
+        assert "Blocked" in reason
+        # Error message MUST explain why --force didn't work, not invite a retry.
+        assert "does not override" in reason
+        assert "Use --force to override" not in reason

-    def test_force_overrides_dangerous_for_trusted(self):
+    def test_force_does_not_override_dangerous_for_trusted_message(self):
        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
        allowed, reason = should_allow_install(
            self._result("trusted", "dangerous", f), force=True
        )
-        assert allowed is True
-        assert "Force-installed" in reason
+        assert allowed is False
+        assert "does not override" in reason
+        assert "Use --force to override" not in reason
+
+    def test_non_dangerous_block_keeps_force_hint(self):
+        # When --force CAN override the block, the error message must still
+        # point to it. Use builtin trust + dangerous to land in the block
+        # branch without triggering the dangerous-specific message.
+        f = [Finding("x", "high", "network", "f", 1, "m", "d")]
+        # Construct a path where decision == block but verdict != dangerous.
+        # community + caution = block per current INSTALL_POLICY.
+        allowed, reason = should_allow_install(
+            self._result("community", "caution", f), force=False
+        )
+        assert allowed is False
+        assert "Use --force to override" in reason
+
+    def test_force_does_not_override_dangerous_for_trusted(self):
+        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
+        allowed, reason = should_allow_install(
+            self._result("trusted", "dangerous", f), force=True
+        )
+        assert allowed is False
+        assert "Blocked" in reason

    # -- agent-created policy --

--- a/tests/tools/test_ssh_bulk_upload.py
+++ b/tests/tools/test_ssh_bulk_upload.py
@ -91,7 +91,7 @@ class TestSSHBulkUpload:
        assert "/home/testuser/.hermes/credentials" in mkdir_str

    def test_staging_symlinks_mirror_remote_layout(self, mock_env, tmp_path):
-        """Symlinks in staging dir should mirror the remote path structure."""
+        """Symlinks in staging dir should mirror the .hermes-relative layout."""
        f1 = tmp_path / "local_a.txt"
        f1.write_text("content a")

@ -107,9 +107,7 @@ class TestSSHBulkUpload:
                c_idx = cmd.index("-C")
                staging_dir = cmd[c_idx + 1]
                # Check the symlink exists
-                expected = os.path.join(
-                    staging_dir, "home/testuser/.hermes/skills/my_skill.md"
-                )
+                expected = os.path.join(staging_dir, "skills/my_skill.md")
                staging_paths.append(expected)
                assert os.path.islink(expected), f"Expected symlink at {expected}"
                assert os.readlink(expected) == os.path.abspath(str(f1))
@ -166,14 +164,42 @@ class TestSSHBulkUpload:
        assert "-" in tar_cmd  # stdout
        assert "-C" in tar_cmd

-        # ssh: extract from stdin at /, preserving existing dir modes (#17767)
+        # ssh: extract from stdin at ~/.hermes, preserving existing dir modes (#17767)
        ssh_str = " ".join(ssh_cmd)
        assert "ssh" in ssh_str
        assert "tar xf -" in ssh_str
        assert "--no-overwrite-dir" in ssh_str
-        assert "-C /" in ssh_str
+        assert "-C /home/testuser/.hermes" in ssh_str
        assert "testuser@example.com" in ssh_str

+    def test_bulk_upload_never_stages_remote_home_prefix(self, mock_env, tmp_path):
+        """Regression: do not archive /home/<user> path components."""
+        f1 = tmp_path / "nested.txt"
+        f1.write_text("nested")
+        files = [(str(f1), "/home/testuser/.hermes/cache/nested.txt")]
+
+        def capture_tar_cmd(cmd, **kwargs):
+            if cmd[0] == "tar":
+                c_idx = cmd.index("-C")
+                staging_dir = cmd[c_idx + 1]
+                assert not os.path.exists(os.path.join(staging_dir, "home"))
+                expected = os.path.join(staging_dir, "cache/nested.txt")
+                assert os.path.islink(expected)
+
+            mock = MagicMock()
+            mock.stdout = MagicMock()
+            mock.returncode = 0
+            mock.poll.return_value = 0
+            mock.communicate.return_value = (b"", b"")
+            mock.stderr = MagicMock()
+            mock.stderr.read.return_value = b""
+            return mock
+
+        with patch.object(subprocess, "run",
+                          return_value=subprocess.CompletedProcess([], 0)), \
+             patch.object(subprocess, "Popen", side_effect=capture_tar_cmd):
+            mock_env._ssh_bulk_upload(files)
+
    def test_mkdir_failure_raises(self, mock_env, tmp_path):
        """mkdir failure should raise RuntimeError before tar pipe."""
        f1 = tmp_path / "y.txt"
--- a/tests/tools/test_tirith_security.py
+++ b/tests/tools/test_tirith_security.py
@ -831,7 +831,8 @@ class TestDiskFailureMarker:
        with patch("tools.tirith_security._failure_marker_path", return_value=marker):
            from tools.tirith_security import _mark_install_failed, _is_install_failed_on_disk
            _mark_install_failed("cosign_missing")
-            assert _is_install_failed_on_disk()  # cosign still absent
+            with patch("tools.tirith_security.shutil.which", return_value=None):
+                assert _is_install_failed_on_disk()  # cosign still absent

            # Now cosign appears on PATH
            with patch("tools.tirith_security.shutil.which", return_value="/usr/local/bin/cosign"):
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@ -23,6 +23,9 @@ def _fake_faster_whisper_module(mock_model):
 # ---------------------------------------------------------------------------


+pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
+
+
@pytest.fixture(autouse=True)
 def _clear_openai_env(monkeypatch):
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
--- a/tests/tools/test_transcription_dotenv_fallback.py
+++ b/tests/tools/test_transcription_dotenv_fallback.py
@ -12,6 +12,9 @@ from unittest.mock import MagicMock, patch
 import pytest


+pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
+
+
@pytest.fixture(autouse=True)
 def isolate_env(monkeypatch):
    """Strip every STT-related env var so the test really exercises the
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@ -42,6 +42,9 @@ def sample_ogg(tmp_path):
    return str(ogg_path)


+pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
+
+
@pytest.fixture(autouse=True)
 def clean_env(monkeypatch):
    """Ensure no real API keys leak into tests."""
--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@ -10,6 +10,18 @@ from unittest.mock import MagicMock, patch
 import pytest


+def _non_wsl_proc_version(real_open):
+    """Return an open() shim that makes host WSL detection deterministic."""
+    def _fake_open(file, *args, **kwargs):
+        if file == "/proc/version":
+            from io import StringIO
+
+            return StringIO("Linux test-kernel")
+        return real_open(file, *args, **kwargs)
+
+    return _fake_open
+
+
 # ============================================================================
 # Fixtures
 # ============================================================================
@ -68,6 +80,7 @@ class TestDetectAudioEnvironment:
        monkeypatch.delenv("SSH_CONNECTION", raising=False)
        monkeypatch.setattr("tools.voice_mode._import_audio",
                            lambda: (MagicMock(), MagicMock()))
+        monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open))

        from tools.voice_mode import detect_audio_environment
        result = detect_audio_environment()
@ -225,6 +238,7 @@ class TestDetectAudioEnvironment:
        monkeypatch.setattr("tools.voice_mode.shutil.which", lambda cmd: "/data/data/com.termux/files/usr/bin/termux-microphone-record" if cmd == "termux-microphone-record" else None)
        monkeypatch.setattr("tools.voice_mode._termux_api_app_installed", lambda: True)
        monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (_ for _ in ()).throw(ImportError("no audio libs")))
+        monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open))

        from tools.voice_mode import detect_audio_environment
        result = detect_audio_environment()