From 8dcab19d02ae76b2793f94c5ccac4f41bc12c4e1 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 29 Apr 2026 19:26:13 -0500 Subject: [PATCH] fix(gateway): fail closed when session.delete can't enumerate active sessions If a concurrent RPC mutates _sessions while session.delete is iterating it (e.g. a parallel session.create on the thread pool), the bare except swallowed the RuntimeError and let the delete proceed against a row that may still be live. Snapshot via list(_sessions.values()) and return an error when even that raises, instead of treating "couldn't check" as "no active sessions." --- tests/test_tui_gateway_server.py | 26 ++++++++++++++++++++++++++ tui_gateway/server.py | 13 +++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 7a597509fb..bae911ac2f 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -2835,6 +2835,32 @@ def test_session_delete_refuses_active_session(monkeypatch): assert called == [], "delete_session must not be called for active sessions" +def test_session_delete_fails_closed_when_active_snapshot_raises(monkeypatch): + """Concurrent ``_sessions`` mutation from another RPC thread can raise + ``RuntimeError: dictionary changed size during iteration``. When the + handler can't enumerate active sessions safely it must refuse the + delete (fail closed) rather than fall through and allow it.""" + + class _DB: + def delete_session(self, *a, **kw): + raise AssertionError("delete must not run when active snapshot fails") + + class _ExplodingDict: + def values(self): + raise RuntimeError("dictionary changed size during iteration") + + monkeypatch.setattr(server, "_get_db", lambda: _DB()) + monkeypatch.setattr(server, "_sessions", _ExplodingDict()) + + resp = server.handle_request( + {"id": "1", "method": "session.delete", "params": {"session_id": "x"}} + ) + + assert "error" in resp + assert resp["error"]["code"] == 5036 + assert "enumerate active sessions" in resp["error"]["message"] + + def test_session_delete_returns_4007_when_missing(monkeypatch): class _DB: def delete_session(self, sid, sessions_dir=None): diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 7ffe834cba..ea7276ce23 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2112,11 +2112,16 @@ def _(rid, params: dict) -> dict: return _db_unavailable_error(rid, code=5036) # Block deletion of any session currently bound to a live TUI session # in this process. The picker hides the active session anyway, but a - # racing caller could still target it. + # racing caller could still target it. Snapshot via ``list(...)`` + # because ``_sessions`` is mutated by concurrent RPCs on the thread + # pool — iterating the dict directly can raise ``RuntimeError: + # dictionary changed size during iteration``. If even the snapshot + # raises, fail closed (refuse the delete) rather than fail open. try: - active = {s.get("session_key") for s in _sessions.values() if s.get("session_key")} - except Exception: - active = set() + snapshot = list(_sessions.values()) + except Exception as e: + return _err(rid, 5036, f"could not enumerate active sessions: {e}") + active = {s.get("session_key") for s in snapshot if s.get("session_key")} if target in active: return _err(rid, 4023, "cannot delete an active session") sessions_dir = get_hermes_home() / "sessions"