Port from cline/cline#10343: periodic gateway memory logging (#27102)

Emit a grep-friendly '[MEMORY] rss=...MB ...' line in agent.log /
gateway.log every N minutes (default 5) so slow leaks in the long-lived
gateway process show up as a time series. Based on
https://github.com/cline/cline/pull/10343
(src/standalone/memory-monitor.ts).

- gateway/memory_monitor.py: new module. Daemon thread, baseline on
  start, final snapshot on stop. Uses resource.getrusage() (stdlib)
  first, falls back to psutil, disables itself with one WARNING if
  neither is available.
- gateway/run.py: start monitor right after setup_logging() in
  start_gateway(); stop it in the shutdown block next to MCP teardown.
- hermes_cli/config.py: logging.memory_monitor { enabled, interval_seconds }
  defaults under the existing logging section.
- tests/gateway/test_memory_monitor.py: 10 unit tests covering format,
  baseline/shutdown snapshots, double-start noop, periodic timer,
  daemon thread invariant, and unavailable-RSS warn-and-skip path.

Adapted from TypeScript/Node to Python (threading.Event-based daemon
thread instead of setInterval/unref), added Python-specific gc + thread
counts to the log line (handier than ext/arrayBuffers for diagnosing
Python gateway leaks), and gated behind a config.yaml toggle so users
can silence the periodic line if they want.

No heap-snapshot-on-OOM equivalent — CPython doesn't have V8's
--heapsnapshot-near-heap-limit; tracemalloc would be the Python
equivalent but adds non-trivial overhead, so leaving that out.
This commit is contained in:
Teknium 2026-05-16 12:55:23 -07:00 committed by GitHub
parent fc03c95da1
commit dc3d0fe148
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 398 additions and 0 deletions

230
gateway/memory_monitor.py Normal file
View file

@ -0,0 +1,230 @@
"""Periodic process memory usage logging for the gateway.
Ported from cline/cline#10343 (src/standalone/memory-monitor.ts).
The gateway is a long-lived process that accumulates memory as it caches
agent instances, session transcripts, tool schemas, memory providers, MCP
connections, etc. A slow leak in any of those subsystems is invisible
in a single log line you only see it by watching RSS climb over hours.
This module emits a single structured ``[MEMORY] ...`` line every N
minutes (default 5) so maintainers investigating a suspected leak can
grep ``agent.log`` / ``gateway.log`` for a time series of RSS + Python
GC stats. The timer runs in a background thread and shuts down cleanly
with the gateway.
Design notes (parity with the Cline port):
* Grep-friendly single-line format beginning ``[MEMORY]``.
* Final snapshot logged on shutdown so "last RSS before exit" is
always in the log.
* Baseline snapshot logged immediately on start.
* Daemon thread never blocks process exit.
* Uses ``resource`` (stdlib, Linux/macOS) first and falls back to
``psutil`` when ``resource`` isn't available (Windows). Both are
optional; when neither works we emit a single WARNING and disable
the monitor rather than crashing the gateway.
Config: ``logging.memory_monitor`` in ``config.yaml`` see
``hermes_cli/config.py`` for the defaults block.
"""
from __future__ import annotations
import gc
import logging
import os
import sys
import threading
import time
from typing import Optional
logger = logging.getLogger(__name__)
_BYTES_TO_MB = 1024 * 1024
_monitor_thread: Optional[threading.Thread] = None
_stop_event: Optional[threading.Event] = None
_start_time: Optional[float] = None
_interval_seconds: float = 300.0 # 5 minutes
_lock = threading.Lock()
def _get_rss_mb() -> Optional[int]:
"""Return current process resident set size in MB, or None if unavailable.
Tries ``resource.getrusage`` first (Linux/macOS, no extra deps), then
falls back to ``psutil`` which is an optional hermes-agent dep.
"""
# Linux / macOS — resource is stdlib. On Linux ru_maxrss is in KB,
# on macOS it is in bytes (yes, really). We use it as a cheap
# "current" RSS — ru_maxrss reports the high-water mark for the
# process, which is what you actually want for leak detection.
try:
import resource
maxrss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if sys.platform == "darwin":
return int(maxrss / _BYTES_TO_MB)
# Linux / other unices: KB
return int(maxrss / 1024)
except Exception:
pass
# Fallback: psutil (Windows, or unusual unix without resource).
try:
import psutil # type: ignore
rss = psutil.Process(os.getpid()).memory_info().rss
return int(rss / _BYTES_TO_MB)
except Exception:
return None
def log_memory_usage(prefix: str = "") -> None:
"""Log current memory usage in a grep-friendly ``[MEMORY] ...`` line.
Safe to call on-demand from any thread at important lifecycle
moments (after shutdown, after context compression, etc.).
Parameters
----------
prefix
Optional extra tag inserted after ``[MEMORY]`` e.g.
``"baseline"``, ``"shutdown"``.
"""
rss = _get_rss_mb()
uptime = int(time.monotonic() - _start_time) if _start_time else 0
# gc.get_stats() returns per-generation collection counts; the sum
# is a cheap proxy for "how much garbage have we created".
try:
gc_counts = gc.get_count() # (gen0, gen1, gen2)
except Exception:
gc_counts = (0, 0, 0)
# Thread count is a handy correlate when diagnosing thread leaks.
try:
thread_count = threading.active_count()
except Exception:
thread_count = 0
tag = f"{prefix} " if prefix else ""
if rss is None:
logger.info(
"[MEMORY] %srss=unavailable gc=%s threads=%d uptime=%ds",
tag,
gc_counts,
thread_count,
uptime,
)
else:
logger.info(
"[MEMORY] %srss=%dMB gc=%s threads=%d uptime=%ds",
tag,
rss,
gc_counts,
thread_count,
uptime,
)
def _monitor_loop(stop_event: threading.Event, interval: float) -> None:
"""Background thread body — log every ``interval`` seconds until stopped."""
while not stop_event.wait(interval):
try:
log_memory_usage()
except Exception as e:
# Never let the monitor crash the gateway; just log and carry on.
logger.debug("Memory monitor iteration failed: %s", e)
def start_memory_monitoring(interval_seconds: float = 300.0) -> bool:
"""Start periodic memory usage logging in a daemon thread.
Logs immediately to capture a baseline, then every ``interval_seconds``.
Safe to call multiple times subsequent calls are no-ops while the
first monitor is still running.
Parameters
----------
interval_seconds
How often to log. Default 300s (5 minutes), matching the
upstream cline/cline implementation.
Returns
-------
bool
True if a fresh monitor thread was started, False if one was
already running or if memory introspection isn't available.
"""
global _monitor_thread, _stop_event, _start_time, _interval_seconds
with _lock:
if _monitor_thread is not None and _monitor_thread.is_alive():
return False
# Sanity-check that we can read RSS at all. If neither resource
# nor psutil works, no point spinning a thread that can only log
# "rss=unavailable" forever — warn once and bail.
if _get_rss_mb() is None:
logger.warning(
"[MEMORY] Memory monitoring unavailable: neither resource.getrusage "
"nor psutil could read process RSS — skipping periodic logging.",
)
return False
_start_time = time.monotonic()
_interval_seconds = float(interval_seconds)
_stop_event = threading.Event()
# Baseline snapshot before the loop starts.
log_memory_usage(prefix="baseline")
_monitor_thread = threading.Thread(
target=_monitor_loop,
args=(_stop_event, _interval_seconds),
name="gateway-memory-monitor",
daemon=True,
)
_monitor_thread.start()
logger.info(
"[MEMORY] Periodic memory monitoring started (interval: %ds)",
int(_interval_seconds),
)
return True
def stop_memory_monitoring(timeout: float = 2.0) -> None:
"""Stop the monitor thread and log a final snapshot.
Safe to call even if ``start_memory_monitoring()`` was never called.
"""
global _monitor_thread, _stop_event
with _lock:
if _stop_event is None or _monitor_thread is None:
return
# Final snapshot before teardown so "last RSS" is always in the log.
try:
log_memory_usage(prefix="shutdown")
except Exception:
pass
_stop_event.set()
thread = _monitor_thread
_monitor_thread = None
_stop_event = None
# Join outside the lock so a stuck log call can't deadlock shutdown.
try:
thread.join(timeout=timeout)
except Exception:
pass
logger.info("[MEMORY] Periodic memory monitoring stopped")
def is_running() -> bool:
"""True if the background monitor thread is alive."""
with _lock:
return _monitor_thread is not None and _monitor_thread.is_alive()

View file

@ -16800,6 +16800,33 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
from hermes_logging import setup_logging
setup_logging(hermes_home=_hermes_home, mode="gateway")
# Periodic process memory usage logging (gateway only) — emits a
# grep-friendly "[MEMORY] rss=...MB ..." line every N minutes so
# slow leaks in the long-lived gateway process show up as a time
# series in agent.log / gateway.log. Ported from cline/cline#10343.
# Controlled by the logging.memory_monitor section in config.yaml.
try:
from gateway import memory_monitor as _memory_monitor
_mm_cfg = {}
try:
# config is loaded a few lines up; re-read the logging section
# here so we pick up user overrides without coupling to local
# variable names inside the start_gateway body.
from hermes_cli.config import load_config as _load_cli_config
_mm_cfg = (_load_cli_config() or {}).get("logging", {}).get("memory_monitor", {}) or {}
except Exception:
_mm_cfg = {}
if _mm_cfg.get("enabled", True):
try:
_mm_interval = float(_mm_cfg.get("interval_seconds", 300))
except (TypeError, ValueError):
_mm_interval = 300.0
_memory_monitor.start_memory_monitoring(interval_seconds=_mm_interval)
except Exception as _mm_exc:
logger.debug("Failed to start memory monitor: %s", _mm_exc)
# Optional stderr handler — level driven by -v/-q flags on the CLI.
# verbosity=None (-q/--quiet): no stderr output
# verbosity=0 (default): WARNING and above
@ -17016,6 +17043,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
except Exception:
pass
# Stop the periodic memory monitor (if it was started above).
# This also emits one final "[MEMORY] shutdown rss=..." line so the
# last RSS reading before gateway exit is always in the log.
try:
from gateway import memory_monitor as _memory_monitor
_memory_monitor.stop_memory_monitoring(timeout=2.0)
except Exception:
pass
if runner.exit_code is not None:
raise SystemExit(runner.exit_code)