diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 94587fccedd..073d0d8510e 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1450,9 +1450,19 @@ class TestRunJobConfigLogging: "prompt": "hello", } + # Mock heavy post-yaml work so the test only exercises the warning + # path. Without these mocks, _run_job_impl continues into provider + # resolution and MCP discovery, both of which can spawn subprocesses + # / hit the network and have caused this test to time out on CI + # (>30s wall clock) under load. See PR #33661 follow-up. with patch("cron.scheduler._hermes_home", tmp_path), \ patch("cron.scheduler._resolve_origin", return_value=None), \ patch("dotenv.load_dotenv"), \ + patch("hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={"provider": "openrouter", "api_key": "x", + "base_url": "https://example.invalid", + "api_mode": "chat_completions"}), \ + patch("tools.mcp_tool.discover_mcp_tools", return_value=[]), \ patch("run_agent.AIAgent") as mock_agent_cls: mock_agent = MagicMock() mock_agent.run_conversation.return_value = {"final_response": "ok"} @@ -1482,6 +1492,11 @@ class TestRunJobConfigLogging: with patch("cron.scheduler._hermes_home", tmp_path), \ patch("cron.scheduler._resolve_origin", return_value=None), \ patch("dotenv.load_dotenv"), \ + patch("hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={"provider": "openrouter", "api_key": "x", + "base_url": "https://example.invalid", + "api_mode": "chat_completions"}), \ + patch("tools.mcp_tool.discover_mcp_tools", return_value=[]), \ patch("run_agent.AIAgent") as mock_agent_cls: mock_agent = MagicMock() mock_agent.run_conversation.return_value = {"final_response": "ok"} diff --git a/tests/tools/test_browser_supervisor.py b/tests/tools/test_browser_supervisor.py index 179a94506ed..8d844cfefc6 100644 --- a/tests/tools/test_browser_supervisor.py +++ b/tests/tools/test_browser_supervisor.py @@ -89,18 +89,45 @@ def chrome_cdp(request): except Exception: time.sleep(0.25) if ws_url is None: - proc.terminate() - proc.wait(timeout=5) + try: + proc.terminate() + proc.wait(timeout=5) + except (subprocess.TimeoutExpired, AssertionError, Exception): + try: + proc.kill() + except Exception: + pass + try: + proc.wait(timeout=2) + except (AssertionError, Exception): + pass shutil.rmtree(profile, ignore_errors=True) pytest.skip("Chrome didn't expose CDP in time") yield ws_url, port - proc.terminate() + # Tear down Chrome. The stdlib `subprocess._wait()` POSIX implementation + # has a known race (https://bugs.python.org/issue38630): when SIGCHLD + # arrives concurrently with `proc.wait()`, `_try_wait(WNOHANG)` can + # return a foreign pid and the `assert pid == self.pid or pid == 0` + # fires. We saw this in CI on slice 1 after this fixture's teardown + # (PR #33661 follow-up). Swallow the stdlib race + force-kill if wait + # hangs, then always reap so we don't leak a zombie. + try: + proc.terminate() + except Exception: + pass try: proc.wait(timeout=3) - except Exception: - proc.kill() + except (subprocess.TimeoutExpired, AssertionError, Exception): + try: + proc.kill() + except Exception: + pass + try: + proc.wait(timeout=2) + except (AssertionError, Exception): + pass shutil.rmtree(profile, ignore_errors=True)