From 48f82448735ea33a4eff74f290298db885000324 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 21 Apr 2026 14:57:23 -0500 Subject: [PATCH] fix(tui): route skills.manage through the long-handler thread pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `/skills browse` is documented to scan 6 sources and take ~15s, but the gateway dispatched `skills.manage` on the main RPC thread. While it ran, every other inbound RPC — completions, new slash commands, even `approval.respond` — blocked until the HTTP fetches finished, making the whole TUI feel frozen. Reported during TUI v2 retest: "/skills browse blocks everything else". `_LONG_HANDLERS` already exists precisely for this pattern (slash.exec, shell.exec, session.resume, etc. run on `_pool`). Add `skills.manage` to that set so browse/search/install run off the dispatcher; the fast `list` / `inspect` actions pay a negligible thread-pool hop. --- tui_gateway/server.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 20564af65..935e9c617 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -40,13 +40,22 @@ _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOU # ── Async RPC dispatch (#12546) ────────────────────────────────────── # A handful of handlers block the dispatcher loop in entry.py for seconds # to minutes (slash.exec, cli.exec, shell.exec, session.resume, -# session.branch). While they're running, inbound RPCs — notably -# approval.respond and session.interrupt — sit unread in the stdin pipe. -# We route only those slow handlers onto a small thread pool; everything -# else stays on the main thread so ordering stays sane for the fast path. -# write_json is already _stdout_lock-guarded, so concurrent response -# writes are safe. -_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"}) +# session.branch, skills.manage). While they're running, inbound RPCs — +# notably approval.respond and session.interrupt — sit unread in the +# stdin pipe. We route only those slow handlers onto a small thread pool; +# everything else stays on the main thread so ordering stays sane for the +# fast path. write_json is already _stdout_lock-guarded, so concurrent +# response writes are safe. +_LONG_HANDLERS = frozenset( + { + "cli.exec", + "session.branch", + "session.resume", + "shell.exec", + "skills.manage", + "slash.exec", + } +) _pool = concurrent.futures.ThreadPoolExecutor( max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),