mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Fold the slash-worker subprocess close into _finalize_session itself — the single _finalized-guarded session-end chokepoint — instead of relying on each caller (_teardown_session, _shutdown_sessions) to close it separately. A future code path that finalizes a session directly can no longer reintroduce the #38095 worker leak. Idempotent: _SlashWorker.close() is poll()-guarded and _finalize_session short-circuits on _finalized, so the existing teardown paths are unaffected. Drops the now-redundant separate close() in _shutdown_sessions. Note: the active leak this issue reported was already fixed on main (WS-orphan reaper #38591, _restart_slash_worker close, atexit shutdown). This addresses the residual defense-in-depth gap the reporter correctly identified in their follow-up comment.
This commit is contained in:
parent
5e06c9ffef
commit
a3fca26c56
2 changed files with 53 additions and 16 deletions
|
|
@ -949,6 +949,37 @@ def test_ws_orphan_reap_closes_worker_when_session_stays_detached(monkeypatch):
|
|||
server._sessions.pop("orphan-sid", None)
|
||||
|
||||
|
||||
def test_finalize_session_closes_slash_worker(monkeypatch):
|
||||
"""_finalize_session closes the slash_worker subprocess itself.
|
||||
|
||||
Regression for #38095: the worker cleanup used to live only in the
|
||||
callers (_teardown_session / _shutdown_sessions), so any code path that
|
||||
finalized a session without going through them leaked the worker. Folding
|
||||
close() into the single _finalized-guarded chokepoint makes the cleanup
|
||||
defense-in-depth and idempotent.
|
||||
"""
|
||||
closed = {"count": 0}
|
||||
|
||||
class _FakeWorker:
|
||||
def close(self):
|
||||
closed["count"] += 1
|
||||
|
||||
monkeypatch.setattr(server, "_notify_session_boundary", lambda *a, **k: None)
|
||||
monkeypatch.setattr(server, "_get_db", lambda: None)
|
||||
|
||||
session = _session(slash_worker=_FakeWorker())
|
||||
|
||||
server._finalize_session(session)
|
||||
assert closed["count"] == 1
|
||||
assert session.get("_finalized") is True
|
||||
|
||||
# Idempotent: a second finalize (or a follow-up teardown) must not
|
||||
# re-close the worker — the _finalized guard short-circuits.
|
||||
server._finalize_session(session)
|
||||
server._teardown_session(session)
|
||||
assert closed["count"] == 1
|
||||
|
||||
|
||||
def test_ws_orphan_reap_spares_reattached_session(monkeypatch):
|
||||
"""A session that rebinds a live transport is NOT considered orphaned."""
|
||||
|
||||
|
|
|
|||
|
|
@ -348,14 +348,31 @@ def _finalize_session(session: dict | None, end_reason: str = "tui_close") -> No
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Close the slash-worker subprocess as part of finalize itself, not just
|
||||
# in the callers. Defense-in-depth: every session-end path goes through
|
||||
# _finalize_session (it's the single ``_finalized``-guarded chokepoint), so
|
||||
# folding worker cleanup in here means a future code path that calls
|
||||
# _finalize_session directly — without the surrounding _teardown_session /
|
||||
# _shutdown_sessions worker.close() — can't reintroduce the #38095 leak.
|
||||
# Idempotent: _SlashWorker.close() is poll()-guarded, so the explicit
|
||||
# close() still in those callers is harmless.
|
||||
try:
|
||||
worker = session.get("slash_worker")
|
||||
if worker:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _teardown_session(session: dict | None) -> None:
|
||||
"""Fully tear down a session: finalize, unregister, close agent + worker.
|
||||
|
||||
Shared by ``session.close`` and the orphaned-WS-session reaper so the
|
||||
slash-worker subprocess is always closed exactly once via the same path.
|
||||
Idempotent: the ``_finalized`` guard in ``_finalize_session`` and the
|
||||
``poll()`` guard in ``_SlashWorker.close`` make repeat calls harmless.
|
||||
Shared by ``session.close`` and the orphaned-WS-session reaper. The
|
||||
slash-worker subprocess is closed inside ``_finalize_session`` (the single
|
||||
finalize chokepoint); this still unregisters the approval notifier and
|
||||
closes the in-process agent. Idempotent: the ``_finalized`` guard in
|
||||
``_finalize_session`` and the ``poll()`` guard in ``_SlashWorker.close``
|
||||
make repeat calls harmless.
|
||||
"""
|
||||
if not session:
|
||||
return
|
||||
|
|
@ -372,12 +389,6 @@ def _teardown_session(session: dict | None) -> None:
|
|||
agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
worker = session.get("slash_worker")
|
||||
if worker:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _ws_session_is_orphaned(session: dict | None) -> bool:
|
||||
|
|
@ -425,13 +436,8 @@ def _shutdown_sessions() -> None:
|
|||
with _sessions_lock:
|
||||
snapshot = list(_sessions.values())
|
||||
for session in snapshot:
|
||||
# _finalize_session closes the slash-worker subprocess too.
|
||||
_finalize_session(session, end_reason="tui_shutdown")
|
||||
try:
|
||||
worker = session.get("slash_worker")
|
||||
if worker:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
atexit.register(_shutdown_sessions)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue