fix(gateway): close residual memory-leak sites under heavy scheduled workload

Long-lived gateways under heavy cron/build workloads grow steadily (~18 MB/hr
post-phantom-dispatch-fix) and eventually need a restart-or-OOM. Four retention
sites, all confirmed live on current main:

1. _evict_cached_agent() (/model, /reasoning, codex-runtime, /undo, etc.) popped
   the cache entry without releasing the agent's OpenAI client, httpx transport,
   SSL context, or conversation history. Only /new cleaned up first. Now releases
   clients on a daemon thread, matching _enforce_agent_cache_cap.

2. _release_evicted_agent_soft() now clears _session_messages after
   release_clients() — tool outputs (file reads, terminal output, search results)
   can be tens of MB per 100+-tool-call session; the list is rebuilt from
   persisted session JSON on resume, so dropping it on soft eviction is safe.

3. The session-expiry watcher (permanent finalization) now drops the session's
   per-session control dicts (_session_model_overrides, _session_reasoning_overrides,
   _pending_approvals, _update_prompt_pending, _pending_model_notes). These leaked
   one entry per session per gateway lifetime. NOTE: this is the session-finalize
   path, NOT idle agent-cache eviction — an idle-evicted session is still alive and
   rebuilds its agent from these overrides, so pruning them there would silently
   reset a user's /model choice.

4. _tool_defs_cache is now bounded (_TOOL_DEFS_CACHE_MAX=8) with oldest-first
   eviction instead of growing unboundedly across the distinct toolset/config
   fingerprints a gateway sees over its lifetime.

Salvaged from #25318 by Michael Steuer (@mssteuer); fix 3 redirected from the
idle-sweep to the session-finalize lifecycle, magic number 8 lifted to a named
constant, test ported.

Fixes #19251
Co-authored-by: Michael Steuer <michael@make.software>
This commit is contained in:
Michael Steuer 2026-06-08 02:22:34 -07:00 committed by Teknium
parent 400e6e43ca
commit 3d029a53ec
3 changed files with 73 additions and 3 deletions

View file

@ -5215,8 +5215,23 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin):
# be garbage-collected. Otherwise the cache grows
# unbounded across the gateway's lifetime.
self._evict_cached_agent(key)
# Mark as finalized and persist to disk so the flag
# survives gateway restarts.
# Permanently finalizing this session — drop its
# per-session control state so the dicts don't grow
# unbounded across the gateway's lifetime. (Idle
# agent-cache eviction must NOT prune these: the
# session is still alive and a resumed turn rebuilds
# its agent from these overrides. Only true session
# finalization, /new, and /reset clear them.)
self._session_model_overrides.pop(key, None)
self._set_session_reasoning_override(key, None)
if hasattr(self, "_pending_model_notes"):
self._pending_model_notes.pop(key, None)
_pending_approvals = getattr(self, "_pending_approvals", None)
if isinstance(_pending_approvals, dict):
_pending_approvals.pop(key, None)
_update_prompt_pending = getattr(self, "_update_prompt_pending", None)
if isinstance(_update_prompt_pending, dict):
_update_prompt_pending.pop(key, None)
with self.session_store._lock:
entry.expiry_finalized = True
self.session_store._save()
@ -12482,7 +12497,21 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin):
_lock = getattr(self, "_agent_cache_lock", None)
if _lock:
with _lock:
self._agent_cache.pop(session_key, None)
entry = self._agent_cache.pop(session_key, None)
# Release clients on a daemon thread, same as _enforce_agent_cache_cap.
# Without this, every /new, /model, /reasoning, codex-runtime change,
# and /undo leaked a full agent: OpenAI client, httpx transport, SSL
# context, and conversation history. Only the /new path cleaned up
# first; the rest popped the entry and dropped it on the floor.
if entry is not None:
agent = entry[0] if isinstance(entry, tuple) and entry else None
if agent is not None:
threading.Thread(
target=self._release_evicted_agent_soft,
args=(agent,),
daemon=True,
name=f"agent-cache-cmd-evict-{session_key[:24]}",
).start()
@staticmethod
def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
@ -12524,6 +12553,13 @@ class GatewayRunner(GatewayKanbanWatchersMixin, GatewaySlashCommandsMixin):
self._cleanup_agent_resources(agent)
except Exception:
pass
# Free conversation history memory — can be tens of MB with tool
# outputs (file reads, terminal output, search results) on heavy
# 100+-tool-call sessions. release_clients() deliberately preserves
# session tool state for resume, but the message list is rebuilt from
# persisted session JSON on the next turn, so dropping it here is safe.
if hasattr(agent, "_session_messages"):
agent._session_messages = []
def _enforce_agent_cache_cap(self) -> None:
"""Evict oldest cached agents when cache exceeds _AGENT_CACHE_MAX_SIZE.

View file

@ -253,6 +253,14 @@ _LEGACY_TOOLSET_MAP = {
# daemon start/stop, env var changes, etc.) on a 30 s horizon.
_tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}
# Hard cap on memoized get_tool_definitions() results. A long-lived Gateway
# process sees many distinct toolset/config fingerprints over its lifetime
# (per-session toolset sets, config edits, kanban-task toggles); without a
# bound the cache grows unboundedly. 8 comfortably covers the warm working
# set (the handful of distinct platform/toolset combos a gateway actually
# serves) while keeping the cap small. (#19251)
_TOOL_DEFS_CACHE_MAX = 8
def _clear_tool_defs_cache() -> None:
"""Drop memoized get_tool_definitions() results. Called when dynamic
@ -329,6 +337,11 @@ def get_tool_definitions(
# agent inits and providers that enforce unique tool names
# (DeepSeek, Xiaomi MiMo, Moonshot Kimi) reject the request with
# HTTP 400. Mirrors the cache-hit path above. (issue #17335)
# Bound the cache with LRU eviction so a long-lived Gateway process
# doesn't accumulate entries unboundedly across the many distinct
# toolset/config fingerprints it sees over its lifetime (#19251).
if len(_tool_defs_cache) >= _TOOL_DEFS_CACHE_MAX:
_tool_defs_cache.pop(next(iter(_tool_defs_cache))) # evict oldest
_tool_defs_cache[cache_key] = result
return list(result)
return result

View file

@ -87,6 +87,27 @@ class TestQuietModeCacheIsolation:
f"baseline={baseline}, final={len(final)}."
)
def test_cache_bounded_by_eviction(self):
"""The cache evicts the oldest entry when it reaches the cap,
keeping the cache bounded instead of growing unbounded over a
long-lived Gateway's lifetime (#19251)."""
cap = model_tools._TOOL_DEFS_CACHE_MAX
# Fill cache to the cap with distinct keys by varying enabled_toolsets.
for i in range(cap):
model_tools.get_tool_definitions(
enabled_toolsets=[f"fake_toolset_{i}"], quiet_mode=True,
)
assert len(model_tools._tool_defs_cache) == cap
# Adding one more must evict the oldest, not clear everything and
# not grow past the cap.
model_tools.get_tool_definitions(
enabled_toolsets=["fake_toolset_overflow"], quiet_mode=True,
)
assert len(model_tools._tool_defs_cache) == cap, (
"Eviction should keep the cache at the cap, not clear it or grow"
)
def test_non_quiet_mode_does_not_use_cache(self):
"""Sanity: quiet_mode=False (TUI path) skips the cache entirely \u2014
explains why the bug only hit Gateway."""