From e5253d852b24f87af91d77b66141cc5c0b9e6f69 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 30 Jun 2026 04:23:27 -0500 Subject: [PATCH] fix(desktop): tree-kill Windows terminal descendants Ensure Windows desktop and local terminal teardown kill full process trees so Git Bash descendants cannot survive wrapper exits and accumulate across retries. --- apps/desktop/electron/main.cjs | 37 +++++++++---------- .../electron/windows-child-process.test.cjs | 23 ++++++++++++ tests/tools/test_local_interrupt_cleanup.py | 27 ++++++++++++++ tests/tools/test_process_registry.py | 20 ++++++++++ tools/environments/local.py | 11 +++++- tools/process_registry.py | 22 ++--------- 6 files changed, 101 insertions(+), 39 deletions(-) diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index e800034500b..a6b70872632 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -5108,13 +5108,24 @@ function resetBootProgressForReconnect() { ) } +function stopBackendChild(child) { + if (!child || child.killed) return + try { + if (IS_WINDOWS && Number.isInteger(child.pid)) { + forceKillProcessTree(child.pid) + } else { + child.kill('SIGTERM') + } + } catch { + // Already gone. + } +} + function resetHermesConnection() { connectionPromise = null backendStartFailure = null - if (hermesProcess && !hermesProcess.killed) { - hermesProcess.kill('SIGTERM') - } + stopBackendChild(hermesProcess) hermesProcess = null resetBootProgressForReconnect() @@ -5362,13 +5373,7 @@ function stopPoolBackend(profile) { const entry = backendPool.get(profile) if (!entry) return backendPool.delete(profile) - if (entry.process && !entry.process.killed) { - try { - entry.process.kill('SIGTERM') - } catch { - // Already gone. - } - } + stopBackendChild(entry.process) } async function teardownPoolBackendAndWait(profile) { @@ -5376,13 +5381,7 @@ async function teardownPoolBackendAndWait(profile) { if (!entry) return backendPool.delete(profile) - if (entry.process && !entry.process.killed) { - try { - entry.process.kill('SIGTERM') - } catch { - // Already gone. - } - } + stopBackendChild(entry.process) await waitForBackendExit(entry.process) } @@ -7600,9 +7599,7 @@ app.on('before-quit', () => { disposeTerminalSession(id) } - if (hermesProcess && !hermesProcess.killed) { - hermesProcess.kill('SIGTERM') - } + stopBackendChild(hermesProcess) stopAllPoolBackends() }) diff --git a/apps/desktop/electron/windows-child-process.test.cjs b/apps/desktop/electron/windows-child-process.test.cjs index 473fd0b2e0b..c15dc3b7b50 100644 --- a/apps/desktop/electron/windows-child-process.test.cjs +++ b/apps/desktop/electron/windows-child-process.test.cjs @@ -74,6 +74,29 @@ test('desktop backend launches console python so child consoles are inherited, n requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/) }) +test('desktop backend teardown tree-kills Windows backend descendants', () => { + const source = readElectronFile('main.cjs') + + const helperIndex = source.indexOf('function stopBackendChild(child)') + assert.notEqual(helperIndex, -1, 'missing backend teardown helper') + const helperSnippet = source.slice(helperIndex, helperIndex + 500) + assert.match(helperSnippet, /IS_WINDOWS && Number\.isInteger\(child\.pid\)/) + assert.match(helperSnippet, /forceKillProcessTree\(child\.pid\)/) + assert.match(helperSnippet, /child\.kill\('SIGTERM'\)/) + + const resetIndex = source.indexOf('function resetHermesConnection()') + assert.notEqual(resetIndex, -1, 'missing resetHermesConnection') + const resetSnippet = source.slice(resetIndex, resetIndex + 300) + assert.match(resetSnippet, /stopBackendChild\(hermesProcess\)/) + assert.doesNotMatch(resetSnippet, /hermesProcess\.kill\('SIGTERM'\)/) + + const quitIndex = source.indexOf("app.on('before-quit'") + assert.notEqual(quitIndex, -1, 'missing before-quit handler') + const quitSnippet = source.slice(quitIndex, quitIndex + 900) + assert.match(quitSnippet, /stopBackendChild\(hermesProcess\)/) + assert.doesNotMatch(quitSnippet, /hermesProcess\.kill\('SIGTERM'\)/) +}) + test('intentional or interactive desktop child processes stay documented', () => { const source = readElectronFile('main.cjs') diff --git a/tests/tools/test_local_interrupt_cleanup.py b/tests/tools/test_local_interrupt_cleanup.py index 73b7c76dcb8..29f3d466784 100644 --- a/tests/tools/test_local_interrupt_cleanup.py +++ b/tests/tools/test_local_interrupt_cleanup.py @@ -20,6 +20,7 @@ from types import SimpleNamespace import pytest +from tools.environments import local as local_mod from tools.environments.local import LocalEnvironment @@ -96,6 +97,32 @@ def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch): assert killpg_calls == [(67890, signal.SIGTERM), (67890, 0)] +def test_kill_process_uses_windows_tree_kill(monkeypatch): + """Windows must kill the whole Bash process tree, not just the wrapper.""" + env = object.__new__(LocalEnvironment) + terminate_calls = [] + waits = [] + killed = [] + + def fake_terminate(pid, *, force=False): + terminate_calls.append((pid, force)) + + proc = SimpleNamespace( + pid=12345, + kill=lambda: killed.append(True), + wait=lambda timeout=None: waits.append(timeout), + ) + + monkeypatch.setattr(local_mod, "_IS_WINDOWS", True) + monkeypatch.setattr("gateway.status.terminate_pid", fake_terminate) + + env._kill_process(proc) + + assert terminate_calls == [(12345, True)] + assert waits == [2.0] + assert killed == [] + + def test_wait_for_process_kills_subprocess_on_keyboardinterrupt(): """When KeyboardInterrupt arrives mid-poll, the subprocess group must be killed before the exception is re-raised.""" diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index 7cd79b0d827..62d491bf82c 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -1103,6 +1103,26 @@ class TestKillProcess: result = registry.kill_process(s.id) assert result["status"] == "already_exited" + def test_kill_local_popen_uses_host_tree_terminator(self, registry, monkeypatch): + s = _make_session(sid="proc_local", command="sleep 999") + s.process = MagicMock() + s.process.pid = 12345 + s.host_start_time = 67890 + registry._running[s.id] = s + terminate_calls = [] + + monkeypatch.setattr( + registry, + "_terminate_host_pid", + lambda pid, expected_start=None: terminate_calls.append((pid, expected_start)), + ) + monkeypatch.setattr(registry, "_write_checkpoint", lambda: None) + + result = registry.kill_process(s.id) + + assert result["status"] == "killed" + assert terminate_calls == [(12345, 67890)] + def test_kill_detached_session_uses_host_pid(self, registry): s = _make_session(sid="proc_detached", command="sleep 999") s.pid = 424242 diff --git a/tools/environments/local.py b/tools/environments/local.py index 50a2522a5a2..9324845a2e7 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -892,7 +892,16 @@ class LocalEnvironment(BaseEnvironment): try: if _IS_WINDOWS: - proc.terminate() + try: + from gateway.status import terminate_pid + + terminate_pid(proc.pid, force=True) + except Exception: + proc.kill() + try: + proc.wait(timeout=2.0) + except (subprocess.TimeoutExpired, OSError): + pass else: try: pgid = os.getpgid(proc.pid) diff --git a/tools/process_registry.py b/tools/process_registry.py index dd887374ec2..5dfa5de7da4 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -1433,24 +1433,10 @@ class ProcessRegistry: if session.pid: os.kill(session.pid, signal.SIGTERM) elif session.process: - # Local process -- kill the process tree - try: - if _IS_WINDOWS: - session.process.terminate() - else: - import psutil - try: - parent = psutil.Process(session.process.pid) - for child in parent.children(recursive=True): - try: - child.terminate() - except psutil.NoSuchProcess: - pass - parent.terminate() - except psutil.NoSuchProcess: - pass - except (ProcessLookupError, PermissionError): - session.process.kill() + # Local process -- kill the process tree. On Windows this + # must be taskkill /T /F; Popen.terminate() only kills the + # shell wrapper and leaves Git Bash descendants behind. + self._terminate_host_pid(session.process.pid, session.host_start_time) elif session.env_ref and session.pid: # Non-local -- kill inside sandbox session.env_ref.execute(f"kill {session.pid} 2>/dev/null", timeout=5)