fix(tui): route completion RPCs to the pool so they can't freeze the TUI (#53895)

complete.path and complete.slash ran inline on the tui_gateway stdin
reader thread. complete.path spawns git ls-files and fuzzy-ranks the
whole repo; complete.slash does first-call prompt_toolkit imports plus a
skill-dir scan. While either ran, prompt.submit / session.interrupt sat
unread in the stdin pipe, freezing the TUI until the 120s RPC timeout
fired — most reliably reproduced by typing @ on a large repo / WSL2 mount.

Add both to _LONG_HANDLERS so completion runs on the existing thread
pool (write_json is already _stdout_lock-guarded). Root-cause fix:
covers any slow completion, not just the bare-@ trigger.

Fixes #21123
This commit is contained in:
Teknium 2026-06-27 19:06:01 -07:00 committed by GitHub
parent ccf526964a
commit a94f657a50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 44 additions and 0 deletions

View file

@ -1736,3 +1736,37 @@ def test_dispatch_unknown_long_method_still_goes_inline(server):
resp = server.dispatch({"id": "r4", "method": "some.method", "params": {}})
assert resp["result"] == {"ok": True}
@pytest.mark.parametrize("completion_method", ["complete.path", "complete.slash"])
def test_completion_handlers_are_pool_routed(completion_method, server):
"""complete.path/complete.slash must run on the pool, never the reader thread.
Regression for #21123: completion ran inline, so a slow git ls-files /
skill-scan blocked prompt.submit and froze the TUI for the 120s RPC timeout.
"""
assert completion_method in server._LONG_HANDLERS
@pytest.mark.parametrize("completion_method", ["complete.path", "complete.slash"])
def test_slow_completion_does_not_block_fast_handler(completion_method, server):
"""A slow completion RPC must not block a concurrent fast handler (#21123)."""
released = threading.Event()
def slow_completion(rid, params):
released.wait(timeout=5)
return server._ok(rid, {"items": []})
server._methods[completion_method] = slow_completion
server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
t0 = time.monotonic()
assert server.dispatch({"id": "slow", "method": completion_method, "params": {}}) is None
fast_resp = server.dispatch({"id": "fast", "method": "fast.ping", "params": {}})
fast_elapsed = time.monotonic() - t0
assert fast_resp["result"] == {"pong": True}
assert fast_elapsed < 0.5, f"fast handler blocked for {fast_elapsed:.2f}s behind {completion_method}"
released.set()

View file

@ -178,6 +178,16 @@ _LONG_HANDLERS = frozenset(
"billing.step_up",
"browser.manage",
"cli.exec",
# Completion RPCs run inline on the reader thread by default, but both
# can block it for seconds: complete.path spawns `git ls-files` and
# fuzzy-ranks the whole repo (slow on large repos / WSL2 mounts), and
# complete.slash does first-call prompt_toolkit imports + a skill-dir
# scan. While either runs inline, prompt.submit / session.interrupt sit
# unread in the stdin pipe — the TUI appears frozen until the 120s RPC
# timeout fires (#21123). Routing them to the pool keeps the fast path
# responsive; completion is read-only and write_json is lock-guarded.
"complete.path",
"complete.slash",
"llm.oneshot",
# Pet RPCs hit the network (manifest fetch / spritesheet download) or do
# per-frame PNG decode/encode (pet.cells): inline they serialize on the