mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(process_registry): kill orphaned Popen on post-spawn setup failure
After Popen succeeds with os.setsid (detached process group), 5 things
happen with no try/except: Thread construction, reader.start(), lock
acquisition, prune+register, checkpoint write. If any raises, the
Popen object goes unregistered and the detached process group leaks
indefinitely.
Wrap the post-spawn setup in try/except. On failure:
- os.killpg(getpgid(pid), SIGKILL) takes down the entire process
group (not just the shell - important because of detached PG +
-lic shell wrapper that may have spawned children)
- proc.kill() fallback for ProcessLookupError/PermissionError/OSError
- proc.wait(timeout=5) reaps with a bound
- re-raise to preserve original traceback
Nested try/except around cleanup so a secondary failure can't mask the
original.
Closes #2749.
This commit is contained in:
parent
c179bdab3c
commit
53ec32819c
2 changed files with 124 additions and 13 deletions
|
|
@ -562,21 +562,42 @@ class ProcessRegistry:
|
|||
session.process = proc
|
||||
session.pid = proc.pid
|
||||
|
||||
# Start output reader thread
|
||||
reader = threading.Thread(
|
||||
target=self._reader_loop,
|
||||
args=(session,),
|
||||
daemon=True,
|
||||
name=f"proc-reader-{session.id}",
|
||||
)
|
||||
session._reader_thread = reader
|
||||
reader.start()
|
||||
try:
|
||||
# Start output reader thread
|
||||
reader = threading.Thread(
|
||||
target=self._reader_loop,
|
||||
args=(session,),
|
||||
daemon=True,
|
||||
name=f"proc-reader-{session.id}",
|
||||
)
|
||||
session._reader_thread = reader
|
||||
reader.start()
|
||||
|
||||
with self._lock:
|
||||
self._prune_if_needed()
|
||||
self._running[session.id] = session
|
||||
with self._lock:
|
||||
self._prune_if_needed()
|
||||
self._running[session.id] = session
|
||||
|
||||
self._write_checkpoint()
|
||||
except Exception:
|
||||
# Post-Popen setup failed — kill the orphaned subprocess (and any
|
||||
# descendants spawned via setsid) before re-raising so they do not
|
||||
# leak as untracked background processes.
|
||||
try:
|
||||
if not _IS_WINDOWS:
|
||||
try:
|
||||
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
proc.kill()
|
||||
else:
|
||||
proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
proc.wait(timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
self._write_checkpoint()
|
||||
return session
|
||||
|
||||
def spawn_via_env(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue