fix(tests): resolve 17 persistent CI test failures (#15084)

Make the main-branch test suite pass again. Most failures were tests still asserting old shapes after recent refactors; two were real source bugs. Source fixes: - tools/mcp_tool.py: _kill_orphaned_mcp_children() slept 2s on every shutdown even when no tracked PIDs existed, making test_shutdown_is_parallel measure ~3s for 3 parallel 1s shutdowns. Early-return when pids is empty. - hermes_cli/tips.py: tip 105 was 157 chars; corpus max is 150. Test fixes (mostly stale mock targets / missing fixture fields): - test_zombie_process_cleanup, test_agent_cache: patch run_agent.cleanup_vm (the local name bound at import), not tools.terminal_tool.cleanup_vm. - test_browser_camofox: patch tools.browser_camofox.load_config, not hermes_cli.config.load_config (the source module, not the resolved one). - test_flush_memories_codex._chat_response_with_memory_call: add finish_reason, tool_call.id, tool_call.type so the chat_completions transport normalizer doesn't AttributeError. - test_concurrent_interrupt: polling_tool signature now accepts messages= kwarg that _invoke_tool() passes through. - test_minimax_provider: add _fallback_chain=[] to the __new__'d agent so switch_model() doesn't AttributeError. - test_skills_config: SKILLS_DIR MagicMock + .rglob stopped working after the scanner switched to agent.skill_utils.iter_skill_index_files (os.walk-based). Point SKILLS_DIR at a real tmp_path and patch agent.skill_utils.get_external_skills_dirs. - test_browser_cdp_tool: browser_cdp toolset was intentionally split into 'browser-cdp' (commit 96b0f3700) so its stricter check_fn doesn't gate the whole browser toolset; test now expects 'browser-cdp'. - test_registry: add tools.browser_dialog_tool to the expected builtin-discovery set (PR #14540 added it). - test_file_tools TestPatchHints: patch_tool surfaces hints as a '_hint' key on the JSON payload, not inline '[Hint: ...' text. - test_write_deny test_hermes_env: resolve .env via get_hermes_home() so the path matches the profile-aware denylist under hermetic HERMES_HOME. - test_checkpoint_manager test_falls_back_to_parent: guard the walk-up so a stray /tmp/pyproject.toml on the host doesn't pick up /tmp as the project root. - test_quick_commands: set cli.session_id in the __new__'d CLI so the alias-args path doesn't trip AttributeError when fuzzy-matching leaks a skill command across xdist test distribution.
2026-04-25 00:51:20 +00:00 · 2026-04-24 03:46:46 -07:00 · 2026-04-24 03:46:46 -07:00 · 18f3fc8a6f
commit 18f3fc8a6f
parent 1f9c368622
15 changed files with 83 additions and 29 deletions
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@ -127,7 +127,7 @@ TIPS = [
    # --- Tools & Capabilities ---
    "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.",
    "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
    "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
    "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
--- a/tests/agent/test_minimax_provider.py
+++ b/tests/agent/test_minimax_provider.py
@ -341,6 +341,7 @@ class TestMinimaxSwitchModelCredentialGuard:
            agent._client_kwargs = {}
            agent.client = None
            agent._anthropic_client = MagicMock()
            agent._fallback_chain = []
        with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
             patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-leaked") as mock_resolve, \
--- a/tests/cli/test_quick_commands.py
+++ b/tests/cli/test_quick_commands.py
@ -23,6 +23,11 @@ class TestCLIQuickCommands:
        cli.console = MagicMock()
        cli.agent = None
        cli.conversation_history = []
        # session_id is accessed by the fallback skill/fuzzy-match path in
        # process_command; without it, tests that exercise `/alias args`
        # can trip an AttributeError when cross-test state leaks a skill
        # command matching the alias target.
        cli.session_id = "test-session"
        return cli
    def test_exec_command_runs_and_prints_output(self):
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@ -950,7 +950,7 @@ class TestAgentCacheIdleResume:
        release_clients() (soft — session may resume).
        """
        from run_agent import AIAgent
-        from tools import terminal_tool as _tt
+        import run_agent as _ra
        # Agent A: evicted from cache (soft) — terminal survives.
        # Agent B: session expired (hard) — terminal torn down.
@ -970,13 +970,16 @@ class TestAgentCacheIdleResume:
        )
        vm_calls: list = []
-        original_vm = _tt.cleanup_vm
+        # AIAgent.close() calls the ``cleanup_vm`` name bound into
-        _tt.cleanup_vm = lambda tid: vm_calls.append(tid)
+        # ``run_agent`` at import time, not ``tools.terminal_tool.cleanup_vm``
        # directly — so patch the ``run_agent`` reference.
        original_vm = _ra.cleanup_vm
        _ra.cleanup_vm = lambda tid: vm_calls.append(tid)
        try:
            agent_a.release_clients()   # cache eviction
            agent_b.close()              # session expiry
        finally:
-            _tt.cleanup_vm = original_vm
+            _ra.cleanup_vm = original_vm
            try:
                agent_a.close()
            except Exception:
--- a/tests/hermes_cli/test_skills_config.py
+++ b/tests/hermes_cli/test_skills_config.py
@ -251,43 +251,48 @@ class TestGetDisabledSkillNames:
 class TestFindAllSkillsFiltering:
    @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"})
    @patch("tools.skills_tool.skill_matches_platform", return_value=True)
-    @patch("tools.skills_tool.SKILLS_DIR")
+    def test_disabled_skill_excluded(self, mock_platform, mock_disabled, tmp_path, monkeypatch):
    def test_disabled_skill_excluded(self, mock_dir, mock_platform, mock_disabled, tmp_path):
        skill_dir = tmp_path / "my-skill"
        skill_dir.mkdir()
        skill_md = skill_dir / "SKILL.md"
        skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
-        mock_dir.exists.return_value = True
+        # Point SKILLS_DIR at the real tempdir so iter_skill_index_files
-        mock_dir.rglob.return_value = [skill_md]
+        # (which uses os.walk) can actually find the file.
        import tools.skills_tool as _st
        import agent.skill_utils as _su
        monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path)
        monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: [])
        from tools.skills_tool import _find_all_skills
        skills = _find_all_skills()
        assert not any(s["name"] == "my-skill" for s in skills)
    @patch("tools.skills_tool._get_disabled_skill_names", return_value=set())
    @patch("tools.skills_tool.skill_matches_platform", return_value=True)
-    @patch("tools.skills_tool.SKILLS_DIR")
+    def test_enabled_skill_included(self, mock_platform, mock_disabled, tmp_path, monkeypatch):
    def test_enabled_skill_included(self, mock_dir, mock_platform, mock_disabled, tmp_path):
        skill_dir = tmp_path / "my-skill"
        skill_dir.mkdir()
        skill_md = skill_dir / "SKILL.md"
        skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
-        mock_dir.exists.return_value = True
+        import tools.skills_tool as _st
-        mock_dir.rglob.return_value = [skill_md]
+        import agent.skill_utils as _su
        monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path)
        monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: [])
        from tools.skills_tool import _find_all_skills
        skills = _find_all_skills()
        assert any(s["name"] == "my-skill" for s in skills)
    @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"})
    @patch("tools.skills_tool.skill_matches_platform", return_value=True)
-    @patch("tools.skills_tool.SKILLS_DIR")
+    def test_skip_disabled_returns_all(self, mock_platform, mock_disabled, tmp_path, monkeypatch):
    def test_skip_disabled_returns_all(self, mock_dir, mock_platform, mock_disabled, tmp_path):
        """skip_disabled=True ignores the disabled set (for config UI)."""
        skill_dir = tmp_path / "my-skill"
        skill_dir.mkdir()
        skill_md = skill_dir / "SKILL.md"
        skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
-        mock_dir.exists.return_value = True
+        import tools.skills_tool as _st
-        mock_dir.rglob.return_value = [skill_md]
+        import agent.skill_utils as _su
        monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path)
        monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: [])
        from tools.skills_tool import _find_all_skills
        skills = _find_all_skills(skip_disabled=True)
        assert any(s["name"] == "my-skill" for s in skills)
--- a/tests/run_agent/test_concurrent_interrupt.py
+++ b/tests/run_agent/test_concurrent_interrupt.py
@ -184,7 +184,7 @@ def test_running_concurrent_worker_sees_is_interrupted(monkeypatch):
    observed = {"saw_true": False, "poll_count": 0, "worker_tid": None}
    worker_started = threading.Event()
-    def polling_tool(name, args, task_id, call_id=None):
+    def polling_tool(name, args, task_id, call_id=None, messages=None):
        observed["worker_tid"] = threading.current_thread().ident
        worker_started.set()
        deadline = time.monotonic() + 5.0
--- a/tests/run_agent/test_flush_memories_codex.py
+++ b/tests/run_agent/test_flush_memories_codex.py
@ -73,9 +73,12 @@ def _chat_response_with_memory_call():
    """Simulated chat completions response with a memory tool call."""
    return SimpleNamespace(
        choices=[SimpleNamespace(
            finish_reason="tool_calls",
            message=SimpleNamespace(
                content=None,
                tool_calls=[SimpleNamespace(
                    id="call_mem_0",
                    type="function",
                    function=SimpleNamespace(
                        name="memory",
                        arguments=json.dumps({
--- a/tests/tools/test_browser_camofox.py
+++ b/tests/tools/test_browser_camofox.py
@ -283,7 +283,7 @@ class TestCamofoxVisionConfig:
        with (
            patch("tools.browser_camofox.open", create=True) as mock_open,
            patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm,
-            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}),
+            patch("tools.browser_camofox.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}),
        ):
            mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng"
            result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t11"))
@ -315,7 +315,7 @@ class TestCamofoxVisionConfig:
        with (
            patch("tools.browser_camofox.open", create=True) as mock_open,
            patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm,
-            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}),
+            patch("tools.browser_camofox.load_config", return_value={"auxiliary": {"vision": {}}}),
        ):
            mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng"
            result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t12"))
--- a/tests/tools/test_browser_cdp_tool.py
+++ b/tests/tools/test_browser_cdp_tool.py
@ -351,7 +351,10 @@ def test_registered_in_browser_toolset():
    entry = registry.get_entry("browser_cdp")
    assert entry is not None
-    assert entry.toolset == "browser"
+    # browser_cdp lives in its own toolset so its stricter check_fn
    # (requires reachable CDP endpoint) doesn't gate the whole browser
    # toolset — see commit 96b0f3700.
    assert entry.toolset == "browser-cdp"
    assert entry.schema["name"] == "browser_cdp"
    assert entry.schema["parameters"]["required"] == ["method"]
    assert "Chrome DevTools Protocol" in entry.schema["description"]
--- a/tests/tools/test_checkpoint_manager.py
+++ b/tests/tools/test_checkpoint_manager.py
@ -357,12 +357,33 @@ class TestWorkingDirResolution:
        result = mgr.get_working_dir_for_path(str(subdir / "file.py"))
        assert result == str(project)
-    def test_falls_back_to_parent(self, tmp_path):
+    def test_falls_back_to_parent(self, tmp_path, monkeypatch):
        mgr = CheckpointManager(enabled=True)
        filepath = tmp_path / "random" / "file.py"
        filepath.parent.mkdir(parents=True)
        filepath.write_text("x\\n")
        # The walk-up scan for project markers (.git, pyproject.toml, etc.)
        # stops at tmp_path — otherwise stray markers in ``/tmp`` (e.g.
        # ``/tmp/pyproject.toml`` left by other tools on the host) get
        # picked up as the project root and this test flakes on shared CI.
        import pathlib as _pl
        _real_exists = _pl.Path.exists
        def _guarded_exists(self):
            s = str(self)
            stop = str(tmp_path)
            if not s.startswith(stop) and any(
                s.endswith("/" + m) or s == "/" + m
                for m in (".git", "pyproject.toml", "package.json",
                          "Cargo.toml", "go.mod", "Makefile", "pom.xml",
                          ".hg", "Gemfile")
            ):
                return False
            return _real_exists(self)
        monkeypatch.setattr(_pl.Path, "exists", _guarded_exists)
        result = mgr.get_working_dir_for_path(str(filepath))
        assert result == str(filepath.parent)
--- a/tests/tools/test_file_tools.py
+++ b/tests/tools/test_file_tools.py
@ -247,7 +247,9 @@ class TestPatchHints:
        from tools.file_tools import patch_tool
        raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y")
-        assert "[Hint:" in raw
+        # patch_tool surfaces the hint as a structured "_hint" field on the
        # JSON error payload (not an inline "[Hint: ..." tail).
        assert "_hint" in raw
        assert "read_file" in raw
    @patch("tools.file_tools._get_file_ops")
@ -260,7 +262,7 @@ class TestPatchHints:
        from tools.file_tools import patch_tool
        raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y")
-        assert "[Hint:" not in raw
+        assert "_hint" not in raw
 class TestSearchHints:
--- a/tests/tools/test_registry.py
+++ b/tests/tools/test_registry.py
@ -292,6 +292,7 @@ class TestBuiltinDiscovery:
    def test_matches_previous_manual_builtin_tool_set(self):
        expected = {
            "tools.browser_cdp_tool",
            "tools.browser_dialog_tool",
            "tools.browser_tool",
            "tools.clarify_tool",
            "tools.code_execution_tool",
--- a/tests/tools/test_write_deny.py
+++ b/tests/tools/test_write_deny.py
@ -33,7 +33,12 @@ class TestWriteDenyExactPaths:
        assert _is_write_denied(path) is True
    def test_hermes_env(self):
-        path = os.path.join(str(Path.home()), ".hermes", ".env")
+        # ``.env`` under the active HERMES_HOME (profile-aware, not just
        # ``~/.hermes``) must be write-denied. The hermetic test conftest
        # points HERMES_HOME at a tempdir — resolve via get_hermes_home()
        # to match the denylist.
        from hermes_constants import get_hermes_home
        path = str(get_hermes_home() / ".env")
        assert _is_write_denied(path) is True
    def test_shell_profiles(self):
--- a/tests/tools/test_zombie_process_cleanup.py
+++ b/tests/tools/test_zombie_process_cleanup.py
@ -110,8 +110,8 @@ class TestAgentCloseMethod:
            agent.client = None
            with patch("tools.process_registry.process_registry") as mock_registry, \
-                 patch("tools.terminal_tool.cleanup_vm") as mock_cleanup_vm, \
+                 patch("run_agent.cleanup_vm") as mock_cleanup_vm, \
-                 patch("tools.browser_tool.cleanup_browser") as mock_cleanup_browser:
+                 patch("run_agent.cleanup_browser") as mock_cleanup_browser:
                agent.close()
                mock_registry.kill_all.assert_called_once_with(
@ -172,9 +172,9 @@ class TestAgentCloseMethod:
            with patch(
                "tools.process_registry.process_registry"
            ) as mock_reg, patch(
-                "tools.terminal_tool.cleanup_vm"
+                "run_agent.cleanup_vm"
            ) as mock_vm, patch(
-                "tools.browser_tool.cleanup_browser"
+                "run_agent.cleanup_browser"
            ) as mock_browser:
                mock_reg.kill_all.side_effect = RuntimeError("boom")
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -2809,6 +2809,11 @@ def _kill_orphaned_mcp_children() -> None:
        pids = dict(_stdio_pids)
        _stdio_pids.clear()
    # Fast path: no tracked stdio PIDs to reap. Skip the SIGTERM/sleep/SIGKILL
    # dance entirely — otherwise every MCP-free shutdown pays a 2s sleep tax.
    if not pids:
        return
    # Phase 1: SIGTERM (graceful)
    for pid, server_name in pids.items():
        try: