From 3d029a53ec327fd7a628cde900bf300ca9a0cc0f Mon Sep 17 00:00:00 2001 From: Michael Steuer Date: Mon, 8 Jun 2026 02:22:34 -0700 Subject: [PATCH] fix(gateway): close residual memory-leak sites under heavy scheduled workload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long-lived gateways under heavy cron/build workloads grow steadily (~18 MB/hr post-phantom-dispatch-fix) and eventually need a restart-or-OOM. Four retention sites, all confirmed live on current main: 1. _evict_cached_agent() (/model, /reasoning, codex-runtime, /undo, etc.) popped the cache entry without releasing the agent's OpenAI client, httpx transport, SSL context, or conversation history. Only /new cleaned up first. Now releases clients on a daemon thread, matching _enforce_agent_cache_cap. 2. _release_evicted_agent_soft() now clears _session_messages after release_clients() — tool outputs (file reads, terminal output, search results) can be tens of MB per 100+-tool-call session; the list is rebuilt from persisted session JSON on resume, so dropping it on soft eviction is safe. 3. The session-expiry watcher (permanent finalization) now drops the session's per-session control dicts (_session_model_overrides, _session_reasoning_overrides, _pending_approvals, _update_prompt_pending, _pending_model_notes). These leaked one entry per session per gateway lifetime. NOTE: this is the session-finalize path, NOT idle agent-cache eviction — an idle-evicted session is still alive and rebuilds its agent from these overrides, so pruning them there would silently reset a user's /model choice. 4. _tool_defs_cache is now bounded (_TOOL_DEFS_CACHE_MAX=8) with oldest-first eviction instead of growing unboundedly across the distinct toolset/config fingerprints a gateway sees over its lifetime. Salvaged from #25318 by Michael Steuer (@mssteuer); fix 3 redirected from the idle-sweep to the session-finalize lifecycle, magic number 8 lifted to a named constant, test ported. Fixes #19251 Co-authored-by: Michael Steuer --- gateway/run.py | 42 +++++++++++++++++-- model_tools.py | 13 ++++++ ...st_get_tool_definitions_cache_isolation.py | 21 ++++++++++ 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 8d512bf3b3c..739ba484909 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5215,8 +5215,23 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin): # be garbage-collected. Otherwise the cache grows # unbounded across the gateway's lifetime. self._evict_cached_agent(key) - # Mark as finalized and persist to disk so the flag - # survives gateway restarts. + # Permanently finalizing this session — drop its + # per-session control state so the dicts don't grow + # unbounded across the gateway's lifetime. (Idle + # agent-cache eviction must NOT prune these: the + # session is still alive and a resumed turn rebuilds + # its agent from these overrides. Only true session + # finalization, /new, and /reset clear them.) + self._session_model_overrides.pop(key, None) + self._set_session_reasoning_override(key, None) + if hasattr(self, "_pending_model_notes"): + self._pending_model_notes.pop(key, None) + _pending_approvals = getattr(self, "_pending_approvals", None) + if isinstance(_pending_approvals, dict): + _pending_approvals.pop(key, None) + _update_prompt_pending = getattr(self, "_update_prompt_pending", None) + if isinstance(_update_prompt_pending, dict): + _update_prompt_pending.pop(key, None) with self.session_store._lock: entry.expiry_finalized = True self.session_store._save() @@ -12482,7 +12497,21 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin): _lock = getattr(self, "_agent_cache_lock", None) if _lock: with _lock: - self._agent_cache.pop(session_key, None) + entry = self._agent_cache.pop(session_key, None) + # Release clients on a daemon thread, same as _enforce_agent_cache_cap. + # Without this, every /new, /model, /reasoning, codex-runtime change, + # and /undo leaked a full agent: OpenAI client, httpx transport, SSL + # context, and conversation history. Only the /new path cleaned up + # first; the rest popped the entry and dropped it on the floor. + if entry is not None: + agent = entry[0] if isinstance(entry, tuple) and entry else None + if agent is not None: + threading.Thread( + target=self._release_evicted_agent_soft, + args=(agent,), + daemon=True, + name=f"agent-cache-cmd-evict-{session_key[:24]}", + ).start() @staticmethod def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None: @@ -12524,6 +12553,13 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin): self._cleanup_agent_resources(agent) except Exception: pass + # Free conversation history memory — can be tens of MB with tool + # outputs (file reads, terminal output, search results) on heavy + # 100+-tool-call sessions. release_clients() deliberately preserves + # session tool state for resume, but the message list is rebuilt from + # persisted session JSON on the next turn, so dropping it here is safe. + if hasattr(agent, "_session_messages"): + agent._session_messages = [] def _enforce_agent_cache_cap(self) -> None: """Evict oldest cached agents when cache exceeds _AGENT_CACHE_MAX_SIZE. diff --git a/model_tools.py b/model_tools.py index 9d04ada2d75..22719a5daef 100644 --- a/model_tools.py +++ b/model_tools.py @@ -253,6 +253,14 @@ _LEGACY_TOOLSET_MAP = { # daemon start/stop, env var changes, etc.) on a 30 s horizon. _tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {} +# Hard cap on memoized get_tool_definitions() results. A long-lived Gateway +# process sees many distinct toolset/config fingerprints over its lifetime +# (per-session toolset sets, config edits, kanban-task toggles); without a +# bound the cache grows unboundedly. 8 comfortably covers the warm working +# set (the handful of distinct platform/toolset combos a gateway actually +# serves) while keeping the cap small. (#19251) +_TOOL_DEFS_CACHE_MAX = 8 + def _clear_tool_defs_cache() -> None: """Drop memoized get_tool_definitions() results. Called when dynamic @@ -329,6 +337,11 @@ def get_tool_definitions( # agent inits and providers that enforce unique tool names # (DeepSeek, Xiaomi MiMo, Moonshot Kimi) reject the request with # HTTP 400. Mirrors the cache-hit path above. (issue #17335) + # Bound the cache with LRU eviction so a long-lived Gateway process + # doesn't accumulate entries unboundedly across the many distinct + # toolset/config fingerprints it sees over its lifetime (#19251). + if len(_tool_defs_cache) >= _TOOL_DEFS_CACHE_MAX: + _tool_defs_cache.pop(next(iter(_tool_defs_cache))) # evict oldest _tool_defs_cache[cache_key] = result return list(result) return result diff --git a/tests/test_get_tool_definitions_cache_isolation.py b/tests/test_get_tool_definitions_cache_isolation.py index b92ef9dc454..bf131804e04 100644 --- a/tests/test_get_tool_definitions_cache_isolation.py +++ b/tests/test_get_tool_definitions_cache_isolation.py @@ -87,6 +87,27 @@ class TestQuietModeCacheIsolation: f"baseline={baseline}, final={len(final)}." ) + def test_cache_bounded_by_eviction(self): + """The cache evicts the oldest entry when it reaches the cap, + keeping the cache bounded instead of growing unbounded over a + long-lived Gateway's lifetime (#19251).""" + cap = model_tools._TOOL_DEFS_CACHE_MAX + # Fill cache to the cap with distinct keys by varying enabled_toolsets. + for i in range(cap): + model_tools.get_tool_definitions( + enabled_toolsets=[f"fake_toolset_{i}"], quiet_mode=True, + ) + assert len(model_tools._tool_defs_cache) == cap + + # Adding one more must evict the oldest, not clear everything and + # not grow past the cap. + model_tools.get_tool_definitions( + enabled_toolsets=["fake_toolset_overflow"], quiet_mode=True, + ) + assert len(model_tools._tool_defs_cache) == cap, ( + "Eviction should keep the cache at the cap, not clear it or grow" + ) + def test_non_quiet_mode_does_not_use_cache(self): """Sanity: quiet_mode=False (TUI path) skips the cache entirely \u2014 explains why the bug only hit Gateway."""