fix: scope tool interrupt signal per-thread to prevent cross-session leaks (#7930)

The interrupt mechanism in tools/interrupt.py used a process-global
threading.Event. In the gateway, multiple agents run concurrently in
the same process via run_in_executor. When any agent was interrupted
(user sends a follow-up message), the global flag killed ALL agents'
running tools — terminal commands, browser ops, web requests — across
all sessions.

Changes:
- tools/interrupt.py: Replace single threading.Event with a set of
  interrupted thread IDs. set_interrupt() targets a specific thread;
  is_interrupted() checks the current thread. Includes a backward-
  compatible _ThreadAwareEventProxy for legacy _interrupt_event usage.
- run_agent.py: Store execution thread ID at start of run_conversation().
  interrupt() and clear_interrupt() pass it to set_interrupt() so only
  this agent's thread is affected.
- tools/code_execution_tool.py: Use is_interrupted() instead of
  directly checking _interrupt_event.is_set().
- tools/process_registry.py: Same — use is_interrupted().
- tests: Update interrupt tests for per-thread semantics. Add new
  TestPerThreadInterruptIsolation with two tests verifying cross-thread
  isolation.
This commit is contained in:
Teknium 2026-04-11 14:02:58 -07:00 committed by GitHub
parent 75380de430
commit dfc820345d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 183 additions and 78 deletions

View file

@ -739,6 +739,7 @@ class AIAgent:
# Interrupt mechanism for breaking out of tool loops
self._interrupt_requested = False
self._interrupt_message = None # Optional message that triggered interrupt
self._execution_thread_id: int | None = None # Set at run_conversation() start
self._client_lock = threading.RLock()
# Subagent delegation state
@ -2832,8 +2833,10 @@ class AIAgent:
"""
self._interrupt_requested = True
self._interrupt_message = message
# Signal all tools to abort any in-flight operations immediately
_set_interrupt(True)
# Signal all tools to abort any in-flight operations immediately.
# Scope the interrupt to this agent's execution thread so other
# agents running in the same process (gateway) are not affected.
_set_interrupt(True, self._execution_thread_id)
# Propagate interrupt to any running child agents (subagent delegation)
with self._active_children_lock:
children_copy = list(self._active_children)
@ -2846,10 +2849,10 @@ class AIAgent:
print("\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
def clear_interrupt(self) -> None:
"""Clear any pending interrupt request and the global tool interrupt signal."""
"""Clear any pending interrupt request and the per-thread tool interrupt signal."""
self._interrupt_requested = False
self._interrupt_message = None
_set_interrupt(False)
_set_interrupt(False, self._execution_thread_id)
def _touch_activity(self, desc: str) -> None:
"""Update the last-activity timestamp and description (thread-safe)."""
@ -7799,6 +7802,11 @@ class AIAgent:
compression_attempts = 0
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
# Record the execution thread so interrupt()/clear_interrupt() can
# scope the tool-level interrupt signal to THIS agent's thread only.
# Must be set before clear_interrupt() which uses it.
self._execution_thread_id = threading.current_thread().ident
# Clear any stale interrupt state at start
self.clear_interrupt()