fix: improve interrupt responsiveness during concurrent tool execution and follow-up turns (#10935)

Three targeted fixes for the 'agent stuck on terminal command' report:

1. **Concurrent tool wait loop now checks interrupts** (run_agent.py)
   The sequential path checked _interrupt_requested before each tool call,
   but the concurrent path's wait loop just blocked with 30s timeouts.
   Now polls every 5s and cancels pending futures on interrupt, giving
   already-running tools 3s to notice the per-thread interrupt signal.

2. **Cancelled concurrent tools get proper interrupt messages** (run_agent.py)
   When a concurrent tool is cancelled or didn't return a result due to
   interrupt, the tool result message says 'skipped due to user interrupt'
   instead of a generic error.

3. **Typing indicator fires before follow-up turn** (gateway/run.py)
   After an interrupt is acknowledged and the pending message dequeued,
   the gateway now sends a typing indicator before starting the recursive
   _run_agent call. This gives the user immediate visual feedback that
   the system is processing their new message (closing the perceived
   'dead air' gap between the interrupt ack and the response).

Reported by @_SushantSays.
This commit is contained in:
Teknium 2026-04-16 02:44:56 -07:00 committed by GitHub
parent 3f6c4346ac
commit 333cb8251b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 194 additions and 13 deletions

View file

@ -7549,24 +7549,50 @@ class AIAgent:
# Wait for all to complete with periodic heartbeats so the
# gateway's inactivity monitor doesn't kill us during long
# concurrent tool batches.
# concurrent tool batches. Also check for user interrupts
# so we don't block indefinitely when the user sends /stop
# or a new message during concurrent tool execution.
_conc_start = time.time()
_interrupt_logged = False
while True:
done, not_done = concurrent.futures.wait(
futures, timeout=30.0,
futures, timeout=5.0,
)
if not not_done:
break
# Check for interrupt — the per-thread interrupt signal
# already causes individual tools (terminal, execute_code)
# to abort, but tools without interrupt checks (web_search,
# read_file) will run to completion. Cancel any futures
# that haven't started yet so we don't block on them.
if self._interrupt_requested:
if not _interrupt_logged:
_interrupt_logged = True
self._vprint(
f"{self.log_prefix}⚡ Interrupt: cancelling "
f"{len(not_done)} pending concurrent tool(s)",
force=True,
)
for f in not_done:
f.cancel()
# Give already-running tools a moment to notice the
# per-thread interrupt signal and exit gracefully.
concurrent.futures.wait(not_done, timeout=3.0)
break
_conc_elapsed = int(time.time() - _conc_start)
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
self._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
)
# Heartbeat every ~30s (6 × 5s poll intervals)
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
self._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
)
finally:
if spinner:
# Build a summary message for the spinner stop
@ -7578,8 +7604,11 @@ class AIAgent:
for i, (tc, name, args) in enumerate(parsed_calls):
r = results[i]
if r is None:
# Shouldn't happen, but safety fallback
function_result = f"Error executing tool '{name}': thread did not return a result"
# Tool was cancelled (interrupt) or thread didn't return
if self._interrupt_requested:
function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
else:
function_result = f"Error executing tool '{name}': thread did not return a result"
tool_duration = 0.0
else:
function_name, function_args, function_result, tool_duration, is_error = r