fix(agent): spawn OpenRouter pre-warm thread only once per process

Each AIAgent.__init__() was unconditionally starting a daemon thread to pre-warm the OpenRouter model metadata cache. In gateway mode a new AIAgent is created for every incoming message, so one OS thread leaked per request. After ~1 000 messages the process hit the Linux thread limit and raised RuntimeError: can't start new thread for all subsequent requests. Add a module-level threading.Event (_openrouter_prewarm_done) that is set before the thread is started. Subsequent AIAgent instantiations skip the spawn entirely; fetch_model_metadata() is cached for 1 hour so the single background call is sufficient. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-16 09:31:37 +00:00 · 2026-04-29 15:47:27 +01:00 · 2026-04-29 15:47:27 +01:00 · a7fb79efb2
commit a7fb79efb2
parent 502debed91
1 changed files with 15 additions and 2 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -323,6 +323,12 @@ _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
 # Maximum number of concurrent worker threads for parallel tool execution.
 _MAX_TOOL_WORKERS = 8

+# Guard so the OpenRouter metadata pre-warm thread is only spawned once per
+# process, not once per AIAgent instantiation.  Without this, long-running
+# gateway processes leak one OS thread per incoming message and eventually
+# exhaust the system thread limit (RuntimeError: can't start new thread).
+_openrouter_prewarm_done = threading.Event()
+
 # Patterns that indicate a terminal command may modify/delete files.
 _DESTRUCTIVE_PATTERNS = re.compile(
    r"""(?:^|\s|&&|\|\||;|`)(?:
@ -1107,10 +1113,17 @@ class AIAgent:
        # Pre-warm OpenRouter model metadata cache in a background thread.
        # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
        # HTTP request on the first API response when pricing is estimated.
-        if self.provider == "openrouter" or self._is_openrouter_url():
+        # Use a process-level Event so this thread is only spawned once — a new
+        # AIAgent is created for every gateway request, so without the guard
+        # each message leaks one OS thread and the process eventually exhausts
+        # the system thread limit (RuntimeError: can't start new thread).
+        if (self.provider == "openrouter" or self._is_openrouter_url()) and \
+                not _openrouter_prewarm_done.is_set():
+            _openrouter_prewarm_done.set()
            threading.Thread(
-                target=lambda: fetch_model_metadata(),
+                target=fetch_model_metadata,
                daemon=True,
+                name="openrouter-prewarm",
            ).start()

        self.tool_progress_callback = tool_progress_callback