fix: repair 57 failing CI tests across 14 files (#5823)

* fix: repair 57 failing CI tests across 14 files Categories of fixes: **Test isolation under xdist (-n auto):** - test_hermes_logging: Strip ALL RotatingFileHandlers before each test to prevent handlers leaked from other xdist workers from polluting counts - test_code_execution: Force TERMINAL_ENV=local in setUp — prevents Modal AuthError when another test leaks TERMINAL_ENV=modal - test_timezone: Same TERMINAL_ENV fix for execute_code timezone tests - test_codex_execution_paths: Mock _resolve_turn_agent_config to ensure model resolution works regardless of xdist worker state **Matrix adapter tests (nio not installed in CI):** - Add _make_fake_nio() helper with real response classes for isinstance() checks in production code - Replace MagicMock(spec=nio.XxxResponse) with fake_nio instances - Wrap production method calls with patch.dict('sys.modules', {'nio': ...}) so import nio succeeds in method bodies - Use try/except instead of pytest.importorskip for nio.crypto imports (importorskip can be fooled by MagicMock in sys.modules) - test_matrix_voice: Skip entire file if nio is a mock, not just missing **Stale test expectations:** - test_cli_provider_resolution: _prompt_provider_choice now takes **kwargs (default param added); mock getpass.getpass alongside input - test_anthropic_oauth_flow: Mock getpass.getpass (code switched from input) - test_gemini_provider: Mock models.dev + OpenRouter API lookups to test hardcoded defaults without external API variance - test_code_execution: Add notify_on_complete to blocked terminal params - test_setup_openclaw_migration: Mock prompt_choice to select 'Full setup' (new quick-setup path leads to _require_tty → sys.exit in CI) - test_skill_manager_tool: Patch get_all_skills_dirs alongside SKILLS_DIR so _find_skill searches tmp_path, not real ~/.hermes/skills/ **Missing attributes in object.__new__ test runners:** - test_platform_reconnect: Add session_store to _make_runner() - test_session_race_guard: Add hooks, _running_agents_ts, session_store, delivery_router to _make_runner() **Production bug fix (gateway/run.py):** - Fix sentinel eviction race: _AGENT_PENDING_SENTINEL was immediately evicted by the stale-detection logic because sentinels have no get_activity_summary() method, causing _stale_idle=inf >= timeout. Guard _should_evict with 'is not _AGENT_PENDING_SENTINEL'. * fix: address remaining CI failures - test_setup_openclaw_migration: Also mock _offer_launch_chat (called at end of both quick and full setup paths) - test_code_execution: Move TERMINAL_ENV=local to module level to protect ALL test classes (TestEnvVarFiltering, TestExecuteCodeEdgeCases, TestInterruptHandling, TestHeadTailTruncation) from xdist env leaks - test_matrix: Use try/except for nio.crypto imports (importorskip can be fooled by MagicMock in sys.modules under xdist)
2026-04-25 00:51:20 +00:00 · 2026-04-07 09:58:45 -07:00 · 2026-04-07 09:58:45 -07:00 · caded0a5e7
commit caded0a5e7
parent f18a2aa634
14 changed files with 208 additions and 69 deletions
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@ -2,12 +2,54 @@
 import asyncio
 import json
 import re
+import sys
+import types
 import pytest
 from unittest.mock import MagicMock, patch, AsyncMock

 from gateway.config import Platform, PlatformConfig


+def _make_fake_nio():
+    """Create a lightweight fake ``nio`` module with real response classes.
+
+    Tests that call production methods doing ``import nio`` / ``isinstance(resp, nio.XxxResponse)``
+    need real classes (not MagicMock auto-attributes) to satisfy isinstance checks.
+    Use via ``patch.dict("sys.modules", {"nio": _make_fake_nio()})``.
+    """
+    mod = types.ModuleType("nio")
+
+    class RoomSendResponse:
+        def __init__(self, event_id="$fake"):
+            self.event_id = event_id
+
+    class RoomRedactResponse:
+        pass
+
+    class RoomCreateResponse:
+        def __init__(self, room_id="!fake:example.org"):
+            self.room_id = room_id
+
+    class RoomInviteResponse:
+        pass
+
+    class UploadResponse:
+        def __init__(self, content_uri="mxc://example.org/fake"):
+            self.content_uri = content_uri
+
+    # Minimal Api stub for code that checks nio.Api.RoomPreset
+    class _Api:
+        pass
+    mod.Api = _Api
+
+    mod.RoomSendResponse = RoomSendResponse
+    mod.RoomRedactResponse = RoomRedactResponse
+    mod.RoomCreateResponse = RoomCreateResponse
+    mod.RoomInviteResponse = RoomInviteResponse
+    mod.UploadResponse = UploadResponse
+    return mod
+
+
 # ---------------------------------------------------------------------------
 # Platform & Config
 # ---------------------------------------------------------------------------
@ -1450,7 +1492,10 @@ class TestMatrixEncryptedMedia:

    @pytest.mark.asyncio
    async def test_on_room_message_media_decrypts_encrypted_image_and_passes_local_path(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")

        adapter = _make_adapter()
        adapter._user_id = "@bot:example.org"
@ -1518,7 +1563,10 @@ class TestMatrixEncryptedMedia:

    @pytest.mark.asyncio
    async def test_on_room_message_media_decrypts_encrypted_voice_and_caches_audio(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")

        adapter = _make_adapter()
        adapter._user_id = "@bot:example.org"
@ -1587,7 +1635,10 @@ class TestMatrixEncryptedMedia:

    @pytest.mark.asyncio
    async def test_on_room_message_media_decrypts_encrypted_file_and_caches_document(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")

        adapter = _make_adapter()
        adapter._user_id = "@bot:example.org"
@ -1883,14 +1934,15 @@ class TestMatrixReactions:
    @pytest.mark.asyncio
    async def test_send_reaction(self):
        """_send_reaction should call room_send with m.reaction."""
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
        mock_client = MagicMock()
        mock_client.room_send = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomSendResponse)
+            return_value=fake_nio.RoomSendResponse("$reaction1")
        )
        self.adapter._client = mock_client

-        result = await self.adapter._send_reaction("!room:ex", "$event1", "👍")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter._send_reaction("!room:ex", "$event1", "👍")
        assert result is True
        mock_client.room_send.assert_called_once()
        args = mock_client.room_send.call_args
@ -1902,7 +1954,8 @@ class TestMatrixReactions:
    @pytest.mark.asyncio
    async def test_send_reaction_no_client(self):
        self.adapter._client = None
-        result = await self.adapter._send_reaction("!room:ex", "$ev", "👍")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter._send_reaction("!room:ex", "$ev", "👍")
        assert result is False

    @pytest.mark.asyncio
@ -1999,21 +2052,23 @@ class TestMatrixRedaction:

    @pytest.mark.asyncio
    async def test_redact_message(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
        mock_client = MagicMock()
        mock_client.room_redact = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomRedactResponse)
+            return_value=fake_nio.RoomRedactResponse()
        )
        self.adapter._client = mock_client

-        result = await self.adapter.redact_message("!room:ex", "$ev1", "oops")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.redact_message("!room:ex", "$ev1", "oops")
        assert result is True
        mock_client.room_redact.assert_called_once()

    @pytest.mark.asyncio
    async def test_redact_no_client(self):
        self.adapter._client = None
-        result = await self.adapter.redact_message("!room:ex", "$ev1")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.redact_message("!room:ex", "$ev1")
        assert result is False


@ -2027,33 +2082,35 @@ class TestMatrixRoomManagement:

    @pytest.mark.asyncio
    async def test_create_room(self):
-        nio = pytest.importorskip("nio")
-        mock_resp = MagicMock(spec=nio.RoomCreateResponse)
-        mock_resp.room_id = "!new:example.org"
+        fake_nio = _make_fake_nio()
+        mock_resp = fake_nio.RoomCreateResponse(room_id="!new:example.org")
        mock_client = MagicMock()
        mock_client.room_create = AsyncMock(return_value=mock_resp)
        self.adapter._client = mock_client

-        room_id = await self.adapter.create_room(name="Test Room", topic="A test")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            room_id = await self.adapter.create_room(name="Test Room", topic="A test")
        assert room_id == "!new:example.org"
        assert "!new:example.org" in self.adapter._joined_rooms

    @pytest.mark.asyncio
    async def test_invite_user(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
        mock_client = MagicMock()
        mock_client.room_invite = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomInviteResponse)
+            return_value=fake_nio.RoomInviteResponse()
        )
        self.adapter._client = mock_client

-        result = await self.adapter.invite_user("!room:ex", "@user:ex")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.invite_user("!room:ex", "@user:ex")
        assert result is True

    @pytest.mark.asyncio
    async def test_create_room_no_client(self):
        self.adapter._client = None
-        result = await self.adapter.create_room()
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.create_room()
        assert result is None


@ -2099,28 +2156,28 @@ class TestMatrixMessageTypes:

    @pytest.mark.asyncio
    async def test_send_emote(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
        mock_client = MagicMock()
-        mock_resp = MagicMock(spec=nio.RoomSendResponse)
-        mock_resp.event_id = "$emote1"
+        mock_resp = fake_nio.RoomSendResponse(event_id="$emote1")
        mock_client.room_send = AsyncMock(return_value=mock_resp)
        self.adapter._client = mock_client

-        result = await self.adapter.send_emote("!room:ex", "waves hello")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.send_emote("!room:ex", "waves hello")
        assert result.success is True
        call_args = mock_client.room_send.call_args[0]
        assert call_args[2]["msgtype"] == "m.emote"

    @pytest.mark.asyncio
    async def test_send_notice(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
        mock_client = MagicMock()
-        mock_resp = MagicMock(spec=nio.RoomSendResponse)
-        mock_resp.event_id = "$notice1"
+        mock_resp = fake_nio.RoomSendResponse(event_id="$notice1")
        mock_client.room_send = AsyncMock(return_value=mock_resp)
        self.adapter._client = mock_client

-        result = await self.adapter.send_notice("!room:ex", "System message")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.send_notice("!room:ex", "System message")
        assert result.success is True
        call_args = mock_client.room_send.call_args[0]
        assert call_args[2]["msgtype"] == "m.notice"
@ -2128,5 +2185,6 @@ class TestMatrixMessageTypes:
    @pytest.mark.asyncio
    async def test_send_emote_empty_text(self):
        self.adapter._client = MagicMock()
-        result = await self.adapter.send_emote("!room:ex", "")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.send_emote("!room:ex", "")
        assert result.success is False
--- a/tests/gateway/test_matrix_voice.py
+++ b/tests/gateway/test_matrix_voice.py
@ -1,10 +1,18 @@
 """Tests for Matrix voice message support (MSC3245)."""
 import io
+import types

 import pytest
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch

-nio = pytest.importorskip("nio", reason="matrix-nio not installed")
+# Try importing real nio; skip entire file if not available.
+# A MagicMock in sys.modules (from another test) is not the real package.
+try:
+    import nio as _nio_probe
+    if not isinstance(_nio_probe, types.ModuleType) or not hasattr(_nio_probe, "__file__"):
+        pytest.skip("nio in sys.modules is a mock, not the real package", allow_module_level=True)
+except ImportError:
+    pytest.skip("matrix-nio not installed", allow_module_level=True)

 from gateway.platforms.base import MessageType

--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@ -59,6 +59,7 @@ def _make_runner():
    runner._honcho_managers = {}
    runner._honcho_configs = {}
    runner._shutdown_all_gateway_honcho = lambda: None
+    runner.session_store = MagicMock()
    return runner


--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@ -36,11 +36,16 @@ def _make_runner():
    )
    runner.adapters = {Platform.TELEGRAM: _FakeAdapter()}
    runner._running_agents = {}
+    runner._running_agents_ts = {}
    runner._pending_messages = {}
    runner._pending_approvals = {}
    runner._voice_mode = {}
    runner._background_tasks = set()
    runner._is_user_authorized = lambda _source: True
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.session_store = MagicMock()
+    runner.delivery_router = MagicMock()
    return runner


--- a/tests/hermes_cli/test_setup_openclaw_migration.py
+++ b/tests/hermes_cli/test_setup_openclaw_migration.py
@ -184,6 +184,8 @@ class TestSetupWizardOpenclawIntegration:
            patch("hermes_cli.auth.get_active_provider", return_value=None),
            # User presses Enter to start
            patch("builtins.input", return_value=""),
+            # Select "Full setup" (index 1) so we exercise the full path
+            patch.object(setup_mod, "prompt_choice", return_value=1),
            # Mock the migration offer
            patch.object(
                setup_mod, "_offer_openclaw_migration", return_value=False
@ -196,6 +198,7 @@ class TestSetupWizardOpenclawIntegration:
            patch.object(setup_mod, "setup_tools"),
            patch.object(setup_mod, "save_config"),
            patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
        ):
            setup_mod.run_setup_wizard(args)

@ -218,6 +221,7 @@ class TestSetupWizardOpenclawIntegration:
            patch.object(setup_mod, "is_interactive_stdin", return_value=True),
            patch("hermes_cli.auth.get_active_provider", return_value=None),
            patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
            patch.object(setup_mod, "_offer_openclaw_migration", return_value=True),
            patch.object(setup_mod, "setup_model_provider"),
            patch.object(setup_mod, "setup_terminal_backend"),
@ -226,6 +230,7 @@ class TestSetupWizardOpenclawIntegration:
            patch.object(setup_mod, "setup_tools"),
            patch.object(setup_mod, "save_config"),
            patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
        ):
            setup_mod.run_setup_wizard(args)

@ -249,6 +254,7 @@ class TestSetupWizardOpenclawIntegration:
            patch.object(setup_mod, "is_interactive_stdin", return_value=True),
            patch("hermes_cli.auth.get_active_provider", return_value=None),
            patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
            patch.object(setup_mod, "_offer_openclaw_migration", return_value=True),
            patch.object(setup_mod, "setup_model_provider") as setup_model_provider,
            patch.object(setup_mod, "setup_terminal_backend"),
@ -257,6 +263,7 @@ class TestSetupWizardOpenclawIntegration:
            patch.object(setup_mod, "setup_tools"),
            patch.object(setup_mod, "save_config"),
            patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
        ):
            setup_mod.run_setup_wizard(args)

@ -438,6 +445,7 @@ class TestSetupWizardSkipsConfiguredSections:
            patch.object(setup_mod, "is_interactive_stdin", return_value=True),
            patch("hermes_cli.auth.get_active_provider", return_value=None),
            patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
            # Migration succeeds and flips the env_side flag
            patch.object(
                setup_mod, "_offer_openclaw_migration",
--- a/tests/test_anthropic_oauth_flow.py
+++ b/tests/test_anthropic_oauth_flow.py
@ -40,6 +40,7 @@ def test_run_anthropic_oauth_flow_manual_token_still_persists(tmp_path, monkeypa
    monkeypatch.setattr("agent.anthropic_adapter.read_claude_code_credentials", lambda: None)
    monkeypatch.setattr("agent.anthropic_adapter.is_claude_code_token_valid", lambda creds: False)
    monkeypatch.setattr("builtins.input", lambda _prompt="": "sk-ant-oat01-manual-token")
+    monkeypatch.setattr("getpass.getpass", lambda _prompt="": "sk-ant-oat01-manual-token")

    from hermes_cli.main import _run_anthropic_oauth_flow

--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@ -538,7 +538,7 @@ def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
        return "openrouter"

    monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
-    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices, **kwargs: len(choices) - 1)
    monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})())

    hermes_main.cmd_model(SimpleNamespace())
@ -579,6 +579,7 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
    # "Use this model? [Y/n]:" — confirm with Enter, then context length.
    answers = iter(["http://localhost:8000", "local-key", "", ""])
    monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
+    monkeypatch.setattr("getpass.getpass", lambda _prompt="": next(answers))

    hermes_main._model_flow_custom({})
    output = capsys.readouterr().out
@ -601,7 +602,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
    monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
    monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda requested, **kwargs: "nous")
    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider_id: None)
-    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: 0)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices, **kwargs: 0)

    captured = {}

--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -152,11 +152,22 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
    runner._provider_routing = {}
    runner._fallback_model = None
    runner._running_agents = {}
+    runner._smart_model_routing = {}
    from unittest.mock import MagicMock, AsyncMock
    runner.hooks = MagicMock()
    runner.hooks.emit = AsyncMock()
    runner.hooks.loaded_hooks = []
    runner._session_db = None
+    # Ensure model resolution returns the codex model even if xdist
+    # leaked env vars cleared HERMES_MODEL.
+    monkeypatch.setattr(
+        gateway_run.GatewayRunner,
+        "_resolve_turn_agent_config",
+        lambda self, msg, model, runtime: {
+            "model": model or "gpt-5.3-codex",
+            "runtime": runtime,
+        },
+    )

    source = SessionSource(
        platform=Platform.LOCAL,
--- a/tests/test_gemini_provider.py
+++ b/tests/test_gemini_provider.py
@ -171,7 +171,11 @@ class TestGeminiModelNormalization:

 class TestGeminiContextLength:
    def test_gemma_4_31b_context(self):
-        ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
+        # Mock external API lookups to test against hardcoded defaults
+        # (models.dev and OpenRouter may return different values like 262144).
+        with patch("agent.models_dev.lookup_models_dev_context", return_value=None), \
+             patch("agent.model_metadata.fetch_model_metadata", return_value={}):
+            ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
        assert ctx == 256000

    def test_gemma_4_26b_context(self):
--- a/tests/test_hermes_logging.py
+++ b/tests/test_hermes_logging.py
@ -14,14 +14,29 @@ import hermes_logging
@pytest.fixture(autouse=True)
 def _reset_logging_state():
    """Reset the module-level sentinel and clean up root logger handlers
-    added by setup_logging() so tests don't leak state."""
+    added by setup_logging() so tests don't leak state.
+
+    Under xdist (-n auto) other test modules may have called setup_logging()
+    in the same worker process, leaving RotatingFileHandlers on the root
+    logger.  We strip ALL RotatingFileHandlers before each test so the count
+    assertions are stable regardless of test ordering.
+    """
    hermes_logging._logging_initialized = False
    root = logging.getLogger()
-    original_handlers = list(root.handlers)
+    # Strip ALL RotatingFileHandlers — not just the ones we added — so that
+    # handlers leaked from other test modules in the same xdist worker don't
+    # pollute our counts.
+    pre_existing = []
+    for h in list(root.handlers):
+        if isinstance(h, RotatingFileHandler):
+            root.removeHandler(h)
+            h.close()
+        else:
+            pre_existing.append(h)
    yield
    # Restore — remove any handlers added during the test.
    for h in list(root.handlers):
-        if h not in original_handlers:
+        if h not in pre_existing:
            root.removeHandler(h)
            h.close()
    hermes_logging._logging_initialized = False
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@ -136,8 +136,11 @@ class TestCodeExecutionTZ:
    """Verify TZ env var is passed to sandboxed child process via real execute_code."""

    @pytest.fixture(autouse=True)
-    def _import_execute_code(self):
+    def _import_execute_code(self, monkeypatch):
        """Lazy-import execute_code to avoid pulling in firecrawl at collection time."""
+        # Force local backend — other tests in the same xdist worker may leak
+        # TERMINAL_ENV=modal/docker which causes modal.exception.AuthError.
+        monkeypatch.setenv("TERMINAL_ENV", "local")
        try:
            from tools.code_execution_tool import execute_code
            self._execute_code = execute_code
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@ -15,9 +15,13 @@ Run with:  python -m pytest tests/test_code_execution.py -v
 import pytest
 # pytestmark removed — tests run fine (61 pass, ~99s)

-
 import json
 import os
+
+# Force local terminal backend for ALL tests in this file.
+# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending
+# execute_code down the remote path → modal.exception.AuthError.
+os.environ["TERMINAL_ENV"] = "local"
 import sys
 import time
 import threading
@ -325,7 +329,7 @@ class TestStubSchemaDrift(unittest.TestCase):
    # Parameters that are internal (injected by the handler, not user-facing)
    _INTERNAL_PARAMS = {"task_id", "user_task"}
    # Parameters intentionally blocked in the sandbox
-    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"}
+    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty", "notify_on_complete"}

    def test_stubs_cover_all_schema_params(self):
        """Every user-facing parameter in the real schema must appear in the
--- a/tests/tools/test_skill_manager_tool.py
+++ b/tests/tools/test_skill_manager_tool.py
@ -1,6 +1,7 @@
 """Tests for tools/skill_manager_tool.py — skill creation, editing, and deletion."""

 import json
+from contextlib import contextmanager
 from pathlib import Path
 from unittest.mock import patch

@ -24,6 +25,15 @@ from tools.skill_manager_tool import (
 )


+@contextmanager
+def _skill_dir(tmp_path):
+    """Patch both SKILLS_DIR and get_all_skills_dirs so _find_skill searches
+    only the temp directory — not the real ~/.hermes/skills/."""
+    with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path), \
+         patch("agent.skill_utils.get_all_skills_dirs", return_value=[tmp_path]):
+        yield
+
+
 VALID_SKILL_CONTENT = """\
 ---
 name: test-skill
@ -179,32 +189,32 @@ class TestValidateFilePath:

 class TestCreateSkill:
    def test_create_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _create_skill("my-skill", VALID_SKILL_CONTENT)
        assert result["success"] is True
        assert (tmp_path / "my-skill" / "SKILL.md").exists()

    def test_create_with_category(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _create_skill("my-skill", VALID_SKILL_CONTENT, category="devops")
        assert result["success"] is True
        assert (tmp_path / "devops" / "my-skill" / "SKILL.md").exists()
        assert result["category"] == "devops"

    def test_create_duplicate_blocked(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _create_skill("my-skill", VALID_SKILL_CONTENT)
        assert result["success"] is False
        assert "already exists" in result["error"]

    def test_create_invalid_name(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _create_skill("Invalid Name!", VALID_SKILL_CONTENT)
        assert result["success"] is False

    def test_create_invalid_content(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _create_skill("my-skill", "no frontmatter here")
        assert result["success"] is False

@ -212,7 +222,8 @@ class TestCreateSkill:
        skills_dir = tmp_path / "skills"
        skills_dir.mkdir()

-        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir):
+        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir), \
+             patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_dir]):
            result = _create_skill("my-skill", VALID_SKILL_CONTENT, category="../escape")

        assert result["success"] is False
@ -224,7 +235,8 @@ class TestCreateSkill:
        skills_dir.mkdir()
        outside = tmp_path / "outside"

-        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir):
+        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir), \
+             patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_dir]):
            result = _create_skill("my-skill", VALID_SKILL_CONTENT, category=str(outside))

        assert result["success"] is False
@ -234,7 +246,7 @@ class TestCreateSkill:

 class TestEditSkill:
    def test_edit_existing_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _edit_skill("my-skill", VALID_SKILL_CONTENT_2)
        assert result["success"] is True
@ -242,13 +254,13 @@ class TestEditSkill:
        assert "Updated description" in content

    def test_edit_nonexistent_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _edit_skill("nonexistent", VALID_SKILL_CONTENT)
        assert result["success"] is False
        assert "not found" in result["error"]

    def test_edit_invalid_content_rejected(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _edit_skill("my-skill", "no frontmatter")
        assert result["success"] is False
@ -259,7 +271,7 @@ class TestEditSkill:

 class TestPatchSkill:
    def test_patch_unique_match(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _patch_skill("my-skill", "Do the thing.", "Do the new thing.")
        assert result["success"] is True
@ -267,7 +279,7 @@ class TestPatchSkill:
        assert "Do the new thing." in content

    def test_patch_nonexistent_string(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _patch_skill("my-skill", "this text does not exist", "replacement")
        assert result["success"] is False
@ -284,7 +296,7 @@ description: A test skill.

 word word
 """
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", content)
            result = _patch_skill("my-skill", "word", "replaced")
        assert result["success"] is False
@ -301,39 +313,39 @@ description: A test skill.

 word word
 """
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", content)
            result = _patch_skill("my-skill", "word", "replaced", replace_all=True)
        assert result["success"] is True

    def test_patch_supporting_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            _write_file("my-skill", "references/api.md", "old text here")
            result = _patch_skill("my-skill", "old text", "new text", file_path="references/api.md")
        assert result["success"] is True

    def test_patch_skill_not_found(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _patch_skill("nonexistent", "old", "new")
        assert result["success"] is False


 class TestDeleteSkill:
    def test_delete_existing(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _delete_skill("my-skill")
        assert result["success"] is True
        assert not (tmp_path / "my-skill").exists()

    def test_delete_nonexistent(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _delete_skill("nonexistent")
        assert result["success"] is False

    def test_delete_cleans_empty_category_dir(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT, category="devops")
            _delete_skill("my-skill")
        assert not (tmp_path / "devops").exists()
@ -346,19 +358,19 @@ class TestDeleteSkill:

 class TestWriteFile:
    def test_write_reference_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _write_file("my-skill", "references/api.md", "# API\nEndpoint docs.")
        assert result["success"] is True
        assert (tmp_path / "my-skill" / "references" / "api.md").exists()

    def test_write_to_nonexistent_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            result = _write_file("nonexistent", "references/doc.md", "content")
        assert result["success"] is False

    def test_write_to_disallowed_path(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _write_file("my-skill", "secret/evil.py", "malicious")
        assert result["success"] is False
@ -366,7 +378,7 @@ class TestWriteFile:

 class TestRemoveFile:
    def test_remove_existing_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            _write_file("my-skill", "references/api.md", "content")
            result = _remove_file("my-skill", "references/api.md")
@ -374,7 +386,7 @@ class TestRemoveFile:
        assert not (tmp_path / "my-skill" / "references" / "api.md").exists()

    def test_remove_nonexistent_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _remove_file("my-skill", "references/nope.md")
        assert result["success"] is False
@ -387,27 +399,27 @@ class TestRemoveFile:

 class TestSkillManageDispatcher:
    def test_unknown_action(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            raw = skill_manage(action="explode", name="test")
        result = json.loads(raw)
        assert result["success"] is False
        assert "Unknown action" in result["error"]

    def test_create_without_content(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            raw = skill_manage(action="create", name="test")
        result = json.loads(raw)
        assert result["success"] is False
        assert "content" in result["error"].lower()

    def test_patch_without_old_string(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            raw = skill_manage(action="patch", name="test")
        result = json.loads(raw)
        assert result["success"] is False

    def test_full_create_via_dispatcher(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
            raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT)
        result = json.loads(raw)
        assert result["success"] is True