mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(process_registry): kill orphaned Popen on post-spawn setup failure
After Popen succeeds with os.setsid (detached process group), 5 things
happen with no try/except: Thread construction, reader.start(), lock
acquisition, prune+register, checkpoint write. If any raises, the
Popen object goes unregistered and the detached process group leaks
indefinitely.
Wrap the post-spawn setup in try/except. On failure:
- os.killpg(getpgid(pid), SIGKILL) takes down the entire process
group (not just the shell - important because of detached PG +
-lic shell wrapper that may have spawned children)
- proc.kill() fallback for ProcessLookupError/PermissionError/OSError
- proc.wait(timeout=5) reaps with a bound
- re-raise to preserve original traceback
Nested try/except around cleanup so a secondary failure can't mask the
original.
Closes #2749.
This commit is contained in:
parent
c179bdab3c
commit
53ec32819c
2 changed files with 124 additions and 13 deletions
|
|
@ -530,6 +530,96 @@ class TestSpawnEnvSanitization:
|
|||
assert env.commands[2][0] == "cat '/path with spaces/hermes_bg.exit' 2>/dev/null"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Popen leak prevention
|
||||
# =========================================================================
|
||||
|
||||
class TestPopenLeakOnSetupFailure:
|
||||
"""Regression for issue #2749: subprocess orphaned when post-Popen setup raises."""
|
||||
|
||||
def test_popen_killed_when_thread_creation_fails(self, registry):
|
||||
"""If Thread() raises after Popen, proc must be killed — not orphaned."""
|
||||
killed = []
|
||||
|
||||
proc = MagicMock()
|
||||
proc.pid = 9999
|
||||
proc.stdout = iter([])
|
||||
proc.stdin = MagicMock()
|
||||
proc.poll.return_value = None
|
||||
|
||||
def fake_kill():
|
||||
killed.append(True)
|
||||
|
||||
proc.kill = fake_kill
|
||||
proc.wait = MagicMock()
|
||||
|
||||
def boom(*args, **kwargs):
|
||||
raise RuntimeError("Thread creation failed")
|
||||
|
||||
with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \
|
||||
patch("subprocess.Popen", return_value=proc), \
|
||||
patch("threading.Thread", side_effect=boom), \
|
||||
patch.object(registry, "_write_checkpoint"):
|
||||
with pytest.raises(RuntimeError, match="Thread creation failed"):
|
||||
registry.spawn_local("echo hello", cwd="/tmp")
|
||||
|
||||
assert killed, "proc.kill() must be called when post-Popen setup raises"
|
||||
|
||||
def test_popen_killed_when_write_checkpoint_fails(self, registry):
|
||||
"""If _write_checkpoint raises after Popen, proc must still be killed."""
|
||||
killed = []
|
||||
|
||||
proc = MagicMock()
|
||||
proc.pid = 8888
|
||||
proc.stdout = iter([])
|
||||
proc.stdin = MagicMock()
|
||||
proc.poll.return_value = None
|
||||
|
||||
def fake_kill():
|
||||
killed.append(True)
|
||||
|
||||
proc.kill = fake_kill
|
||||
proc.wait = MagicMock()
|
||||
|
||||
fake_thread = MagicMock()
|
||||
|
||||
with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \
|
||||
patch("subprocess.Popen", return_value=proc), \
|
||||
patch("threading.Thread", return_value=fake_thread), \
|
||||
patch.object(registry, "_write_checkpoint", side_effect=OSError("disk full")):
|
||||
with pytest.raises(OSError, match="disk full"):
|
||||
registry.spawn_local("echo hello", cwd="/tmp")
|
||||
|
||||
assert killed, "proc.kill() must be called when _write_checkpoint raises"
|
||||
|
||||
def test_popen_not_killed_on_success(self, registry):
|
||||
"""Successful spawn must NOT kill the process."""
|
||||
killed = []
|
||||
|
||||
proc = MagicMock()
|
||||
proc.pid = 7777
|
||||
proc.stdout = iter([])
|
||||
proc.stdin = MagicMock()
|
||||
proc.poll.return_value = None
|
||||
|
||||
def fake_kill():
|
||||
killed.append(True)
|
||||
|
||||
proc.kill = fake_kill
|
||||
proc.wait = MagicMock()
|
||||
|
||||
fake_thread = MagicMock()
|
||||
|
||||
with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \
|
||||
patch("subprocess.Popen", return_value=proc), \
|
||||
patch("threading.Thread", return_value=fake_thread), \
|
||||
patch.object(registry, "_write_checkpoint"):
|
||||
session = registry.spawn_local("echo hello", cwd="/tmp")
|
||||
|
||||
assert not killed, "proc.kill() must NOT be called on successful spawn"
|
||||
assert session.pid == 7777
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Checkpoint
|
||||
# =========================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue