diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index 831eff51f46..f438b637e28 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -530,6 +530,96 @@ class TestSpawnEnvSanitization: assert env.commands[2][0] == "cat '/path with spaces/hermes_bg.exit' 2>/dev/null" +# ========================================================================= +# Popen leak prevention +# ========================================================================= + +class TestPopenLeakOnSetupFailure: + """Regression for issue #2749: subprocess orphaned when post-Popen setup raises.""" + + def test_popen_killed_when_thread_creation_fails(self, registry): + """If Thread() raises after Popen, proc must be killed — not orphaned.""" + killed = [] + + proc = MagicMock() + proc.pid = 9999 + proc.stdout = iter([]) + proc.stdin = MagicMock() + proc.poll.return_value = None + + def fake_kill(): + killed.append(True) + + proc.kill = fake_kill + proc.wait = MagicMock() + + def boom(*args, **kwargs): + raise RuntimeError("Thread creation failed") + + with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \ + patch("subprocess.Popen", return_value=proc), \ + patch("threading.Thread", side_effect=boom), \ + patch.object(registry, "_write_checkpoint"): + with pytest.raises(RuntimeError, match="Thread creation failed"): + registry.spawn_local("echo hello", cwd="/tmp") + + assert killed, "proc.kill() must be called when post-Popen setup raises" + + def test_popen_killed_when_write_checkpoint_fails(self, registry): + """If _write_checkpoint raises after Popen, proc must still be killed.""" + killed = [] + + proc = MagicMock() + proc.pid = 8888 + proc.stdout = iter([]) + proc.stdin = MagicMock() + proc.poll.return_value = None + + def fake_kill(): + killed.append(True) + + proc.kill = fake_kill + proc.wait = MagicMock() + + fake_thread = MagicMock() + + with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \ + patch("subprocess.Popen", return_value=proc), \ + patch("threading.Thread", return_value=fake_thread), \ + patch.object(registry, "_write_checkpoint", side_effect=OSError("disk full")): + with pytest.raises(OSError, match="disk full"): + registry.spawn_local("echo hello", cwd="/tmp") + + assert killed, "proc.kill() must be called when _write_checkpoint raises" + + def test_popen_not_killed_on_success(self, registry): + """Successful spawn must NOT kill the process.""" + killed = [] + + proc = MagicMock() + proc.pid = 7777 + proc.stdout = iter([]) + proc.stdin = MagicMock() + proc.poll.return_value = None + + def fake_kill(): + killed.append(True) + + proc.kill = fake_kill + proc.wait = MagicMock() + + fake_thread = MagicMock() + + with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \ + patch("subprocess.Popen", return_value=proc), \ + patch("threading.Thread", return_value=fake_thread), \ + patch.object(registry, "_write_checkpoint"): + session = registry.spawn_local("echo hello", cwd="/tmp") + + assert not killed, "proc.kill() must NOT be called on successful spawn" + assert session.pid == 7777 + + # ========================================================================= # Checkpoint # ========================================================================= diff --git a/tools/process_registry.py b/tools/process_registry.py index d4c602bb4ce..260ba4739fd 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -562,21 +562,42 @@ class ProcessRegistry: session.process = proc session.pid = proc.pid - # Start output reader thread - reader = threading.Thread( - target=self._reader_loop, - args=(session,), - daemon=True, - name=f"proc-reader-{session.id}", - ) - session._reader_thread = reader - reader.start() + try: + # Start output reader thread + reader = threading.Thread( + target=self._reader_loop, + args=(session,), + daemon=True, + name=f"proc-reader-{session.id}", + ) + session._reader_thread = reader + reader.start() - with self._lock: - self._prune_if_needed() - self._running[session.id] = session + with self._lock: + self._prune_if_needed() + self._running[session.id] = session + + self._write_checkpoint() + except Exception: + # Post-Popen setup failed — kill the orphaned subprocess (and any + # descendants spawned via setsid) before re-raising so they do not + # leak as untracked background processes. + try: + if not _IS_WINDOWS: + try: + os.killpg(os.getpgid(proc.pid), signal.SIGKILL) + except (ProcessLookupError, PermissionError, OSError): + proc.kill() + else: + proc.kill() + except Exception: + pass + try: + proc.wait(timeout=5) + except Exception: + pass + raise - self._write_checkpoint() return session def spawn_via_env(