diff --git a/gateway/run.py b/gateway/run.py
index 7a45be62d..56518be65 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1858,6 +1858,11 @@ class GatewayRunner:
         if _quick_key in self._running_agents and _stale_ts:
             _stale_age = time.time() - _stale_ts
             _stale_agent = self._running_agents.get(_quick_key)
+            # Never evict the pending sentinel — it was just placed moments
+            # ago during the async setup phase before the real agent is
+            # created.  Sentinels have no get_activity_summary(), so the
+            # idle check below would always evaluate to inf >= timeout and
+            # immediately evict them, racing with the setup path.
             _stale_idle = float("inf")  # assume idle if we can't check
             _stale_detail = ""
             if _stale_agent and hasattr(_stale_agent, "get_activity_summary"):
@@ -1876,8 +1881,11 @@ class GatewayRunner:
             # cases where the agent object was garbage-collected).
             _wall_ttl = max(_raw_stale_timeout * 10, 7200) if _raw_stale_timeout > 0 else float("inf")
             _should_evict = (
-                (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
-                or _stale_age > _wall_ttl
+                _stale_agent is not _AGENT_PENDING_SENTINEL
+                and (
+                    (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
+                    or _stale_age > _wall_ttl
+                )
             )
             if _should_evict:
                 logger.warning(
diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py
index 09f0ab959..0de00b736 100644
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -2,12 +2,54 @@
 import asyncio
 import json
 import re
+import sys
+import types
 import pytest
 from unittest.mock import MagicMock, patch, AsyncMock
 
 from gateway.config import Platform, PlatformConfig
 
 
+def _make_fake_nio():
+    """Create a lightweight fake ``nio`` module with real response classes.
+
+    Tests that call production methods doing ``import nio`` / ``isinstance(resp, nio.XxxResponse)``
+    need real classes (not MagicMock auto-attributes) to satisfy isinstance checks.
+    Use via ``patch.dict("sys.modules", {"nio": _make_fake_nio()})``.
+    """
+    mod = types.ModuleType("nio")
+
+    class RoomSendResponse:
+        def __init__(self, event_id="$fake"):
+            self.event_id = event_id
+
+    class RoomRedactResponse:
+        pass
+
+    class RoomCreateResponse:
+        def __init__(self, room_id="!fake:example.org"):
+            self.room_id = room_id
+
+    class RoomInviteResponse:
+        pass
+
+    class UploadResponse:
+        def __init__(self, content_uri="mxc://example.org/fake"):
+            self.content_uri = content_uri
+
+    # Minimal Api stub for code that checks nio.Api.RoomPreset
+    class _Api:
+        pass
+    mod.Api = _Api
+
+    mod.RoomSendResponse = RoomSendResponse
+    mod.RoomRedactResponse = RoomRedactResponse
+    mod.RoomCreateResponse = RoomCreateResponse
+    mod.RoomInviteResponse = RoomInviteResponse
+    mod.UploadResponse = UploadResponse
+    return mod
+
+
 # ---------------------------------------------------------------------------
 # Platform & Config
 # ---------------------------------------------------------------------------
@@ -1450,7 +1492,10 @@ class TestMatrixEncryptedMedia:
 
     @pytest.mark.asyncio
     async def test_on_room_message_media_decrypts_encrypted_image_and_passes_local_path(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")
 
         adapter = _make_adapter()
         adapter._user_id = "@bot:example.org"
@@ -1518,7 +1563,10 @@ class TestMatrixEncryptedMedia:
 
     @pytest.mark.asyncio
     async def test_on_room_message_media_decrypts_encrypted_voice_and_caches_audio(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")
 
         adapter = _make_adapter()
         adapter._user_id = "@bot:example.org"
@@ -1587,7 +1635,10 @@ class TestMatrixEncryptedMedia:
 
     @pytest.mark.asyncio
     async def test_on_room_message_media_decrypts_encrypted_file_and_caches_document(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")
 
         adapter = _make_adapter()
         adapter._user_id = "@bot:example.org"
@@ -1883,14 +1934,15 @@ class TestMatrixReactions:
     @pytest.mark.asyncio
     async def test_send_reaction(self):
         """_send_reaction should call room_send with m.reaction."""
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
         mock_client.room_send = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomSendResponse)
+            return_value=fake_nio.RoomSendResponse("$reaction1")
         )
         self.adapter._client = mock_client
 
-        result = await self.adapter._send_reaction("!room:ex", "$event1", "👍")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter._send_reaction("!room:ex", "$event1", "👍")
         assert result is True
         mock_client.room_send.assert_called_once()
         args = mock_client.room_send.call_args
@@ -1902,7 +1954,8 @@ class TestMatrixReactions:
     @pytest.mark.asyncio
     async def test_send_reaction_no_client(self):
         self.adapter._client = None
-        result = await self.adapter._send_reaction("!room:ex", "$ev", "👍")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter._send_reaction("!room:ex", "$ev", "👍")
         assert result is False
 
     @pytest.mark.asyncio
@@ -1999,21 +2052,23 @@ class TestMatrixRedaction:
 
     @pytest.mark.asyncio
     async def test_redact_message(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
         mock_client.room_redact = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomRedactResponse)
+            return_value=fake_nio.RoomRedactResponse()
         )
         self.adapter._client = mock_client
 
-        result = await self.adapter.redact_message("!room:ex", "$ev1", "oops")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.redact_message("!room:ex", "$ev1", "oops")
         assert result is True
         mock_client.room_redact.assert_called_once()
 
     @pytest.mark.asyncio
     async def test_redact_no_client(self):
         self.adapter._client = None
-        result = await self.adapter.redact_message("!room:ex", "$ev1")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.redact_message("!room:ex", "$ev1")
         assert result is False
 
 
@@ -2027,33 +2082,35 @@ class TestMatrixRoomManagement:
 
     @pytest.mark.asyncio
     async def test_create_room(self):
-        nio = pytest.importorskip("nio")
-        mock_resp = MagicMock(spec=nio.RoomCreateResponse)
-        mock_resp.room_id = "!new:example.org"
+        fake_nio = _make_fake_nio()
+        mock_resp = fake_nio.RoomCreateResponse(room_id="!new:example.org")
         mock_client = MagicMock()
         mock_client.room_create = AsyncMock(return_value=mock_resp)
         self.adapter._client = mock_client
 
-        room_id = await self.adapter.create_room(name="Test Room", topic="A test")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            room_id = await self.adapter.create_room(name="Test Room", topic="A test")
         assert room_id == "!new:example.org"
         assert "!new:example.org" in self.adapter._joined_rooms
 
     @pytest.mark.asyncio
     async def test_invite_user(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
         mock_client.room_invite = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomInviteResponse)
+            return_value=fake_nio.RoomInviteResponse()
         )
         self.adapter._client = mock_client
 
-        result = await self.adapter.invite_user("!room:ex", "@user:ex")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.invite_user("!room:ex", "@user:ex")
         assert result is True
 
     @pytest.mark.asyncio
     async def test_create_room_no_client(self):
         self.adapter._client = None
-        result = await self.adapter.create_room()
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.create_room()
         assert result is None
 
 
@@ -2099,28 +2156,28 @@ class TestMatrixMessageTypes:
 
     @pytest.mark.asyncio
     async def test_send_emote(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
-        mock_resp = MagicMock(spec=nio.RoomSendResponse)
-        mock_resp.event_id = "$emote1"
+        mock_resp = fake_nio.RoomSendResponse(event_id="$emote1")
         mock_client.room_send = AsyncMock(return_value=mock_resp)
         self.adapter._client = mock_client
 
-        result = await self.adapter.send_emote("!room:ex", "waves hello")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.send_emote("!room:ex", "waves hello")
         assert result.success is True
         call_args = mock_client.room_send.call_args[0]
         assert call_args[2]["msgtype"] == "m.emote"
 
     @pytest.mark.asyncio
     async def test_send_notice(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
-        mock_resp = MagicMock(spec=nio.RoomSendResponse)
-        mock_resp.event_id = "$notice1"
+        mock_resp = fake_nio.RoomSendResponse(event_id="$notice1")
         mock_client.room_send = AsyncMock(return_value=mock_resp)
         self.adapter._client = mock_client
 
-        result = await self.adapter.send_notice("!room:ex", "System message")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.send_notice("!room:ex", "System message")
         assert result.success is True
         call_args = mock_client.room_send.call_args[0]
         assert call_args[2]["msgtype"] == "m.notice"
@@ -2128,5 +2185,6 @@ class TestMatrixMessageTypes:
     @pytest.mark.asyncio
     async def test_send_emote_empty_text(self):
         self.adapter._client = MagicMock()
-        result = await self.adapter.send_emote("!room:ex", "")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.send_emote("!room:ex", "")
         assert result.success is False
diff --git a/tests/gateway/test_matrix_voice.py b/tests/gateway/test_matrix_voice.py
index 79f0947f6..93d56caf1 100644
--- a/tests/gateway/test_matrix_voice.py
+++ b/tests/gateway/test_matrix_voice.py
@@ -1,10 +1,18 @@
 """Tests for Matrix voice message support (MSC3245)."""
 import io
+import types
 
 import pytest
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
 
-nio = pytest.importorskip("nio", reason="matrix-nio not installed")
+# Try importing real nio; skip entire file if not available.
+# A MagicMock in sys.modules (from another test) is not the real package.
+try:
+    import nio as _nio_probe
+    if not isinstance(_nio_probe, types.ModuleType) or not hasattr(_nio_probe, "__file__"):
+        pytest.skip("nio in sys.modules is a mock, not the real package", allow_module_level=True)
+except ImportError:
+    pytest.skip("matrix-nio not installed", allow_module_level=True)
 
 from gateway.platforms.base import MessageType
 
diff --git a/tests/gateway/test_platform_reconnect.py b/tests/gateway/test_platform_reconnect.py
index 68dfd2044..566742723 100644
--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@@ -59,6 +59,7 @@ def _make_runner():
     runner._honcho_managers = {}
     runner._honcho_configs = {}
     runner._shutdown_all_gateway_honcho = lambda: None
+    runner.session_store = MagicMock()
     return runner
 
 
diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py
index 427718c95..ff21cdef8 100644
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@@ -36,11 +36,16 @@ def _make_runner():
     )
     runner.adapters = {Platform.TELEGRAM: _FakeAdapter()}
     runner._running_agents = {}
+    runner._running_agents_ts = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._voice_mode = {}
     runner._background_tasks = set()
     runner._is_user_authorized = lambda _source: True
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.session_store = MagicMock()
+    runner.delivery_router = MagicMock()
     return runner
 
 
diff --git a/tests/hermes_cli/test_setup_openclaw_migration.py b/tests/hermes_cli/test_setup_openclaw_migration.py
index 0991b6d1b..b956f1fe6 100644
--- a/tests/hermes_cli/test_setup_openclaw_migration.py
+++ b/tests/hermes_cli/test_setup_openclaw_migration.py
@@ -184,6 +184,8 @@ class TestSetupWizardOpenclawIntegration:
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             # User presses Enter to start
             patch("builtins.input", return_value=""),
+            # Select "Full setup" (index 1) so we exercise the full path
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             # Mock the migration offer
             patch.object(
                 setup_mod, "_offer_openclaw_migration", return_value=False
@@ -196,6 +198,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "setup_tools"),
             patch.object(setup_mod, "save_config"),
             patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
         ):
             setup_mod.run_setup_wizard(args)
 
@@ -218,6 +221,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "is_interactive_stdin", return_value=True),
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             patch.object(setup_mod, "_offer_openclaw_migration", return_value=True),
             patch.object(setup_mod, "setup_model_provider"),
             patch.object(setup_mod, "setup_terminal_backend"),
@@ -226,6 +230,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "setup_tools"),
             patch.object(setup_mod, "save_config"),
             patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
         ):
             setup_mod.run_setup_wizard(args)
 
@@ -249,6 +254,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "is_interactive_stdin", return_value=True),
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             patch.object(setup_mod, "_offer_openclaw_migration", return_value=True),
             patch.object(setup_mod, "setup_model_provider") as setup_model_provider,
             patch.object(setup_mod, "setup_terminal_backend"),
@@ -257,6 +263,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "setup_tools"),
             patch.object(setup_mod, "save_config"),
             patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
         ):
             setup_mod.run_setup_wizard(args)
 
@@ -438,6 +445,7 @@ class TestSetupWizardSkipsConfiguredSections:
             patch.object(setup_mod, "is_interactive_stdin", return_value=True),
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             # Migration succeeds and flips the env_side flag
             patch.object(
                 setup_mod, "_offer_openclaw_migration",
diff --git a/tests/test_anthropic_oauth_flow.py b/tests/test_anthropic_oauth_flow.py
index 3b52831aa..61cd6155a 100644
--- a/tests/test_anthropic_oauth_flow.py
+++ b/tests/test_anthropic_oauth_flow.py
@@ -40,6 +40,7 @@ def test_run_anthropic_oauth_flow_manual_token_still_persists(tmp_path, monkeypa
     monkeypatch.setattr("agent.anthropic_adapter.read_claude_code_credentials", lambda: None)
     monkeypatch.setattr("agent.anthropic_adapter.is_claude_code_token_valid", lambda creds: False)
     monkeypatch.setattr("builtins.input", lambda _prompt="": "sk-ant-oat01-manual-token")
+    monkeypatch.setattr("getpass.getpass", lambda _prompt="": "sk-ant-oat01-manual-token")
 
     from hermes_cli.main import _run_anthropic_oauth_flow
 
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index 53e485027..bd78a98ea 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -538,7 +538,7 @@ def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
         return "openrouter"
 
     monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
-    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices, **kwargs: len(choices) - 1)
     monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})())
 
     hermes_main.cmd_model(SimpleNamespace())
@@ -579,6 +579,7 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
     # "Use this model? [Y/n]:" — confirm with Enter, then context length.
     answers = iter(["http://localhost:8000", "local-key", "", ""])
     monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
+    monkeypatch.setattr("getpass.getpass", lambda _prompt="": next(answers))
 
     hermes_main._model_flow_custom({})
     output = capsys.readouterr().out
@@ -601,7 +602,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
     monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
     monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda requested, **kwargs: "nous")
     monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider_id: None)
-    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: 0)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices, **kwargs: 0)
 
     captured = {}
 
diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py
index de33a0b91..354c95dde 100644
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@@ -152,11 +152,22 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
     runner._provider_routing = {}
     runner._fallback_model = None
     runner._running_agents = {}
+    runner._smart_model_routing = {}
     from unittest.mock import MagicMock, AsyncMock
     runner.hooks = MagicMock()
     runner.hooks.emit = AsyncMock()
     runner.hooks.loaded_hooks = []
     runner._session_db = None
+    # Ensure model resolution returns the codex model even if xdist
+    # leaked env vars cleared HERMES_MODEL.
+    monkeypatch.setattr(
+        gateway_run.GatewayRunner,
+        "_resolve_turn_agent_config",
+        lambda self, msg, model, runtime: {
+            "model": model or "gpt-5.3-codex",
+            "runtime": runtime,
+        },
+    )
 
     source = SessionSource(
         platform=Platform.LOCAL,
diff --git a/tests/test_gemini_provider.py b/tests/test_gemini_provider.py
index d0cba5d63..b448ca513 100644
--- a/tests/test_gemini_provider.py
+++ b/tests/test_gemini_provider.py
@@ -171,7 +171,11 @@ class TestGeminiModelNormalization:
 
 class TestGeminiContextLength:
     def test_gemma_4_31b_context(self):
-        ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
+        # Mock external API lookups to test against hardcoded defaults
+        # (models.dev and OpenRouter may return different values like 262144).
+        with patch("agent.models_dev.lookup_models_dev_context", return_value=None), \
+             patch("agent.model_metadata.fetch_model_metadata", return_value={}):
+            ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
         assert ctx == 256000
 
     def test_gemma_4_26b_context(self):
diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py
index 7b4004ef6..5b40e6323 100644
--- a/tests/test_hermes_logging.py
+++ b/tests/test_hermes_logging.py
@@ -14,14 +14,29 @@ import hermes_logging
 @pytest.fixture(autouse=True)
 def _reset_logging_state():
     """Reset the module-level sentinel and clean up root logger handlers
-    added by setup_logging() so tests don't leak state."""
+    added by setup_logging() so tests don't leak state.
+
+    Under xdist (-n auto) other test modules may have called setup_logging()
+    in the same worker process, leaving RotatingFileHandlers on the root
+    logger.  We strip ALL RotatingFileHandlers before each test so the count
+    assertions are stable regardless of test ordering.
+    """
     hermes_logging._logging_initialized = False
     root = logging.getLogger()
-    original_handlers = list(root.handlers)
+    # Strip ALL RotatingFileHandlers — not just the ones we added — so that
+    # handlers leaked from other test modules in the same xdist worker don't
+    # pollute our counts.
+    pre_existing = []
+    for h in list(root.handlers):
+        if isinstance(h, RotatingFileHandler):
+            root.removeHandler(h)
+            h.close()
+        else:
+            pre_existing.append(h)
     yield
     # Restore — remove any handlers added during the test.
     for h in list(root.handlers):
-        if h not in original_handlers:
+        if h not in pre_existing:
             root.removeHandler(h)
             h.close()
     hermes_logging._logging_initialized = False
diff --git a/tests/test_timezone.py b/tests/test_timezone.py
index 9848212ce..2d0216117 100644
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@@ -136,8 +136,11 @@ class TestCodeExecutionTZ:
     """Verify TZ env var is passed to sandboxed child process via real execute_code."""
 
     @pytest.fixture(autouse=True)
-    def _import_execute_code(self):
+    def _import_execute_code(self, monkeypatch):
         """Lazy-import execute_code to avoid pulling in firecrawl at collection time."""
+        # Force local backend — other tests in the same xdist worker may leak
+        # TERMINAL_ENV=modal/docker which causes modal.exception.AuthError.
+        monkeypatch.setenv("TERMINAL_ENV", "local")
         try:
             from tools.code_execution_tool import execute_code
             self._execute_code = execute_code
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 9d6df27c6..085ffad29 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -15,9 +15,13 @@ Run with:  python -m pytest tests/test_code_execution.py -v
 import pytest
 # pytestmark removed — tests run fine (61 pass, ~99s)
 
-
 import json
 import os
+
+# Force local terminal backend for ALL tests in this file.
+# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending
+# execute_code down the remote path → modal.exception.AuthError.
+os.environ["TERMINAL_ENV"] = "local"
 import sys
 import time
 import threading
@@ -325,7 +329,7 @@ class TestStubSchemaDrift(unittest.TestCase):
     # Parameters that are internal (injected by the handler, not user-facing)
     _INTERNAL_PARAMS = {"task_id", "user_task"}
     # Parameters intentionally blocked in the sandbox
-    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"}
+    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty", "notify_on_complete"}
 
     def test_stubs_cover_all_schema_params(self):
         """Every user-facing parameter in the real schema must appear in the
diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py
index a20d23fcb..c1e615bde 100644
--- a/tests/tools/test_skill_manager_tool.py
+++ b/tests/tools/test_skill_manager_tool.py
@@ -1,6 +1,7 @@
 """Tests for tools/skill_manager_tool.py — skill creation, editing, and deletion."""
 
 import json
+from contextlib import contextmanager
 from pathlib import Path
 from unittest.mock import patch
 
@@ -24,6 +25,15 @@ from tools.skill_manager_tool import (
 )
 
 
+@contextmanager
+def _skill_dir(tmp_path):
+    """Patch both SKILLS_DIR and get_all_skills_dirs so _find_skill searches
+    only the temp directory — not the real ~/.hermes/skills/."""
+    with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path), \
+         patch("agent.skill_utils.get_all_skills_dirs", return_value=[tmp_path]):
+        yield
+
+
 VALID_SKILL_CONTENT = """\
 ---
 name: test-skill
@@ -179,32 +189,32 @@ class TestValidateFilePath:
 
 class TestCreateSkill:
     def test_create_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT)
         assert result["success"] is True
         assert (tmp_path / "my-skill" / "SKILL.md").exists()
 
     def test_create_with_category(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT, category="devops")
         assert result["success"] is True
         assert (tmp_path / "devops" / "my-skill" / "SKILL.md").exists()
         assert result["category"] == "devops"
 
     def test_create_duplicate_blocked(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _create_skill("my-skill", VALID_SKILL_CONTENT)
         assert result["success"] is False
         assert "already exists" in result["error"]
 
     def test_create_invalid_name(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("Invalid Name!", VALID_SKILL_CONTENT)
         assert result["success"] is False
 
     def test_create_invalid_content(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("my-skill", "no frontmatter here")
         assert result["success"] is False
 
@@ -212,7 +222,8 @@ class TestCreateSkill:
         skills_dir = tmp_path / "skills"
         skills_dir.mkdir()
 
-        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir):
+        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir), \
+             patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_dir]):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT, category="../escape")
 
         assert result["success"] is False
@@ -224,7 +235,8 @@ class TestCreateSkill:
         skills_dir.mkdir()
         outside = tmp_path / "outside"
 
-        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir):
+        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir), \
+             patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_dir]):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT, category=str(outside))
 
         assert result["success"] is False
@@ -234,7 +246,7 @@ class TestCreateSkill:
 
 class TestEditSkill:
     def test_edit_existing_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _edit_skill("my-skill", VALID_SKILL_CONTENT_2)
         assert result["success"] is True
@@ -242,13 +254,13 @@ class TestEditSkill:
         assert "Updated description" in content
 
     def test_edit_nonexistent_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _edit_skill("nonexistent", VALID_SKILL_CONTENT)
         assert result["success"] is False
         assert "not found" in result["error"]
 
     def test_edit_invalid_content_rejected(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _edit_skill("my-skill", "no frontmatter")
         assert result["success"] is False
@@ -259,7 +271,7 @@ class TestEditSkill:
 
 class TestPatchSkill:
     def test_patch_unique_match(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _patch_skill("my-skill", "Do the thing.", "Do the new thing.")
         assert result["success"] is True
@@ -267,7 +279,7 @@ class TestPatchSkill:
         assert "Do the new thing." in content
 
     def test_patch_nonexistent_string(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _patch_skill("my-skill", "this text does not exist", "replacement")
         assert result["success"] is False
@@ -284,7 +296,7 @@ description: A test skill.
 
 word word
 """
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", content)
             result = _patch_skill("my-skill", "word", "replaced")
         assert result["success"] is False
@@ -301,39 +313,39 @@ description: A test skill.
 
 word word
 """
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", content)
             result = _patch_skill("my-skill", "word", "replaced", replace_all=True)
         assert result["success"] is True
 
     def test_patch_supporting_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             _write_file("my-skill", "references/api.md", "old text here")
             result = _patch_skill("my-skill", "old text", "new text", file_path="references/api.md")
         assert result["success"] is True
 
     def test_patch_skill_not_found(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _patch_skill("nonexistent", "old", "new")
         assert result["success"] is False
 
 
 class TestDeleteSkill:
     def test_delete_existing(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _delete_skill("my-skill")
         assert result["success"] is True
         assert not (tmp_path / "my-skill").exists()
 
     def test_delete_nonexistent(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _delete_skill("nonexistent")
         assert result["success"] is False
 
     def test_delete_cleans_empty_category_dir(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT, category="devops")
             _delete_skill("my-skill")
         assert not (tmp_path / "devops").exists()
@@ -346,19 +358,19 @@ class TestDeleteSkill:
 
 class TestWriteFile:
     def test_write_reference_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _write_file("my-skill", "references/api.md", "# API\nEndpoint docs.")
         assert result["success"] is True
         assert (tmp_path / "my-skill" / "references" / "api.md").exists()
 
     def test_write_to_nonexistent_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _write_file("nonexistent", "references/doc.md", "content")
         assert result["success"] is False
 
     def test_write_to_disallowed_path(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _write_file("my-skill", "secret/evil.py", "malicious")
         assert result["success"] is False
@@ -366,7 +378,7 @@ class TestWriteFile:
 
 class TestRemoveFile:
     def test_remove_existing_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             _write_file("my-skill", "references/api.md", "content")
             result = _remove_file("my-skill", "references/api.md")
@@ -374,7 +386,7 @@ class TestRemoveFile:
         assert not (tmp_path / "my-skill" / "references" / "api.md").exists()
 
     def test_remove_nonexistent_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _remove_file("my-skill", "references/nope.md")
         assert result["success"] is False
@@ -387,27 +399,27 @@ class TestRemoveFile:
 
 class TestSkillManageDispatcher:
     def test_unknown_action(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="explode", name="test")
         result = json.loads(raw)
         assert result["success"] is False
         assert "Unknown action" in result["error"]
 
     def test_create_without_content(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="create", name="test")
         result = json.loads(raw)
         assert result["success"] is False
         assert "content" in result["error"].lower()
 
     def test_patch_without_old_string(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="patch", name="test")
         result = json.loads(raw)
         assert result["success"] is False
 
     def test_full_create_via_dispatcher(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT)
         result = json.loads(raw)
         assert result["success"] is True