fix(gateway): close residual memory-leak sites under heavy scheduled workload

Long-lived gateways under heavy cron/build workloads grow steadily (~18 MB/hr post-phantom-dispatch-fix) and eventually need a restart-or-OOM. Four retention sites, all confirmed live on current main: 1. _evict_cached_agent() (/model, /reasoning, codex-runtime, /undo, etc.) popped the cache entry without releasing the agent's OpenAI client, httpx transport, SSL context, or conversation history. Only /new cleaned up first. Now releases clients on a daemon thread, matching _enforce_agent_cache_cap. 2. _release_evicted_agent_soft() now clears _session_messages after release_clients() — tool outputs (file reads, terminal output, search results) can be tens of MB per 100+-tool-call session; the list is rebuilt from persisted session JSON on resume, so dropping it on soft eviction is safe. 3. The session-expiry watcher (permanent finalization) now drops the session's per-session control dicts (_session_model_overrides, _session_reasoning_overrides, _pending_approvals, _update_prompt_pending, _pending_model_notes). These leaked one entry per session per gateway lifetime. NOTE: this is the session-finalize path, NOT idle agent-cache eviction — an idle-evicted session is still alive and rebuilds its agent from these overrides, so pruning them there would silently reset a user's /model choice. 4. _tool_defs_cache is now bounded (_TOOL_DEFS_CACHE_MAX=8) with oldest-first eviction instead of growing unboundedly across the distinct toolset/config fingerprints a gateway sees over its lifetime. Salvaged from #25318 by Michael Steuer (@mssteuer); fix 3 redirected from the idle-sweep to the session-finalize lifecycle, magic number 8 lifted to a named constant, test ported. Fixes #19251 Co-authored-by: Michael Steuer <michael@make.software>
2026-07-28 18:19:28 +00:00 · 2026-06-08 02:22:34 -07:00 · 2026-06-08 02:22:34 -07:00 · 3d029a53ec
commit 3d029a53ec
parent 400e6e43ca
3 changed files with 73 additions and 3 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -5215,8 +5215,23 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin):
                        # be garbage-collected.  Otherwise the cache grows
                        # unbounded across the gateway's lifetime.
                        self._evict_cached_agent(key)
-                        # Mark as finalized and persist to disk so the flag
-                        # survives gateway restarts.
+                        # Permanently finalizing this session — drop its
+                        # per-session control state so the dicts don't grow
+                        # unbounded across the gateway's lifetime. (Idle
+                        # agent-cache eviction must NOT prune these: the
+                        # session is still alive and a resumed turn rebuilds
+                        # its agent from these overrides. Only true session
+                        # finalization, /new, and /reset clear them.)
+                        self._session_model_overrides.pop(key, None)
+                        self._set_session_reasoning_override(key, None)
+                        if hasattr(self, "_pending_model_notes"):
+                            self._pending_model_notes.pop(key, None)
+                        _pending_approvals = getattr(self, "_pending_approvals", None)
+                        if isinstance(_pending_approvals, dict):
+                            _pending_approvals.pop(key, None)
+                        _update_prompt_pending = getattr(self, "_update_prompt_pending", None)
+                        if isinstance(_update_prompt_pending, dict):
+                            _update_prompt_pending.pop(key, None)
                        with self.session_store._lock:
                            entry.expiry_finalized = True
                            self.session_store._save()
@ -12482,7 +12497,21 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin):
        _lock = getattr(self, "_agent_cache_lock", None)
        if _lock:
            with _lock:
-                self._agent_cache.pop(session_key, None)
+                entry = self._agent_cache.pop(session_key, None)
+            # Release clients on a daemon thread, same as _enforce_agent_cache_cap.
+            # Without this, every /new, /model, /reasoning, codex-runtime change,
+            # and /undo leaked a full agent: OpenAI client, httpx transport, SSL
+            # context, and conversation history. Only the /new path cleaned up
+            # first; the rest popped the entry and dropped it on the floor.
+            if entry is not None:
+                agent = entry[0] if isinstance(entry, tuple) and entry else None
+                if agent is not None:
+                    threading.Thread(
+                        target=self._release_evicted_agent_soft,
+                        args=(agent,),
+                        daemon=True,
+                        name=f"agent-cache-cmd-evict-{session_key[:24]}",
+                    ).start()

    @staticmethod
    def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
@ -12524,6 +12553,13 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin):
                self._cleanup_agent_resources(agent)
        except Exception:
            pass
+        # Free conversation history memory — can be tens of MB with tool
+        # outputs (file reads, terminal output, search results) on heavy
+        # 100+-tool-call sessions. release_clients() deliberately preserves
+        # session tool state for resume, but the message list is rebuilt from
+        # persisted session JSON on the next turn, so dropping it here is safe.
+        if hasattr(agent, "_session_messages"):
+            agent._session_messages = []

    def _enforce_agent_cache_cap(self) -> None:
        """Evict oldest cached agents when cache exceeds _AGENT_CACHE_MAX_SIZE.
--- a/model_tools.py
+++ b/model_tools.py
@ -253,6 +253,14 @@ _LEGACY_TOOLSET_MAP = {
 # daemon start/stop, env var changes, etc.) on a 30 s horizon.
 _tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}

+# Hard cap on memoized get_tool_definitions() results. A long-lived Gateway
+# process sees many distinct toolset/config fingerprints over its lifetime
+# (per-session toolset sets, config edits, kanban-task toggles); without a
+# bound the cache grows unboundedly. 8 comfortably covers the warm working
+# set (the handful of distinct platform/toolset combos a gateway actually
+# serves) while keeping the cap small. (#19251)
+_TOOL_DEFS_CACHE_MAX = 8
+

 def _clear_tool_defs_cache() -> None:
    """Drop memoized get_tool_definitions() results. Called when dynamic
@ -329,6 +337,11 @@ def get_tool_definitions(
        # agent inits and providers that enforce unique tool names
        # (DeepSeek, Xiaomi MiMo, Moonshot Kimi) reject the request with
        # HTTP 400. Mirrors the cache-hit path above. (issue #17335)
+        # Bound the cache with LRU eviction so a long-lived Gateway process
+        # doesn't accumulate entries unboundedly across the many distinct
+        # toolset/config fingerprints it sees over its lifetime (#19251).
+        if len(_tool_defs_cache) >= _TOOL_DEFS_CACHE_MAX:
+            _tool_defs_cache.pop(next(iter(_tool_defs_cache)))  # evict oldest
        _tool_defs_cache[cache_key] = result
        return list(result)
    return result
--- a/tests/test_get_tool_definitions_cache_isolation.py
+++ b/tests/test_get_tool_definitions_cache_isolation.py
@ -87,6 +87,27 @@ class TestQuietModeCacheIsolation:
            f"baseline={baseline}, final={len(final)}."
        )

+    def test_cache_bounded_by_eviction(self):
+        """The cache evicts the oldest entry when it reaches the cap,
+        keeping the cache bounded instead of growing unbounded over a
+        long-lived Gateway's lifetime (#19251)."""
+        cap = model_tools._TOOL_DEFS_CACHE_MAX
+        # Fill cache to the cap with distinct keys by varying enabled_toolsets.
+        for i in range(cap):
+            model_tools.get_tool_definitions(
+                enabled_toolsets=[f"fake_toolset_{i}"], quiet_mode=True,
+            )
+        assert len(model_tools._tool_defs_cache) == cap
+
+        # Adding one more must evict the oldest, not clear everything and
+        # not grow past the cap.
+        model_tools.get_tool_definitions(
+            enabled_toolsets=["fake_toolset_overflow"], quiet_mode=True,
+        )
+        assert len(model_tools._tool_defs_cache) == cap, (
+            "Eviction should keep the cache at the cap, not clear it or grow"
+        )
+
    def test_non_quiet_mode_does_not_use_cache(self):
        """Sanity: quiet_mode=False (TUI path) skips the cache entirely \u2014
        explains why the bug only hit Gateway."""