mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-20 05:01:30 +00:00
Port from cline/cline#10343: periodic gateway memory logging (#27102)
Emit a grep-friendly '[MEMORY] rss=...MB ...' line in agent.log / gateway.log every N minutes (default 5) so slow leaks in the long-lived gateway process show up as a time series. Based on https://github.com/cline/cline/pull/10343 (src/standalone/memory-monitor.ts). - gateway/memory_monitor.py: new module. Daemon thread, baseline on start, final snapshot on stop. Uses resource.getrusage() (stdlib) first, falls back to psutil, disables itself with one WARNING if neither is available. - gateway/run.py: start monitor right after setup_logging() in start_gateway(); stop it in the shutdown block next to MCP teardown. - hermes_cli/config.py: logging.memory_monitor { enabled, interval_seconds } defaults under the existing logging section. - tests/gateway/test_memory_monitor.py: 10 unit tests covering format, baseline/shutdown snapshots, double-start noop, periodic timer, daemon thread invariant, and unavailable-RSS warn-and-skip path. Adapted from TypeScript/Node to Python (threading.Event-based daemon thread instead of setInterval/unref), added Python-specific gc + thread counts to the log line (handier than ext/arrayBuffers for diagnosing Python gateway leaks), and gated behind a config.yaml toggle so users can silence the periodic line if they want. No heap-snapshot-on-OOM equivalent — CPython doesn't have V8's --heapsnapshot-near-heap-limit; tracemalloc would be the Python equivalent but adds non-trivial overhead, so leaving that out.
This commit is contained in:
parent
fc03c95da1
commit
dc3d0fe148
4 changed files with 398 additions and 0 deletions
|
|
@ -16800,6 +16800,33 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
from hermes_logging import setup_logging
|
||||
setup_logging(hermes_home=_hermes_home, mode="gateway")
|
||||
|
||||
# Periodic process memory usage logging (gateway only) — emits a
|
||||
# grep-friendly "[MEMORY] rss=...MB ..." line every N minutes so
|
||||
# slow leaks in the long-lived gateway process show up as a time
|
||||
# series in agent.log / gateway.log. Ported from cline/cline#10343.
|
||||
# Controlled by the logging.memory_monitor section in config.yaml.
|
||||
try:
|
||||
from gateway import memory_monitor as _memory_monitor
|
||||
|
||||
_mm_cfg = {}
|
||||
try:
|
||||
# config is loaded a few lines up; re-read the logging section
|
||||
# here so we pick up user overrides without coupling to local
|
||||
# variable names inside the start_gateway body.
|
||||
from hermes_cli.config import load_config as _load_cli_config
|
||||
|
||||
_mm_cfg = (_load_cli_config() or {}).get("logging", {}).get("memory_monitor", {}) or {}
|
||||
except Exception:
|
||||
_mm_cfg = {}
|
||||
if _mm_cfg.get("enabled", True):
|
||||
try:
|
||||
_mm_interval = float(_mm_cfg.get("interval_seconds", 300))
|
||||
except (TypeError, ValueError):
|
||||
_mm_interval = 300.0
|
||||
_memory_monitor.start_memory_monitoring(interval_seconds=_mm_interval)
|
||||
except Exception as _mm_exc:
|
||||
logger.debug("Failed to start memory monitor: %s", _mm_exc)
|
||||
|
||||
# Optional stderr handler — level driven by -v/-q flags on the CLI.
|
||||
# verbosity=None (-q/--quiet): no stderr output
|
||||
# verbosity=0 (default): WARNING and above
|
||||
|
|
@ -17016,6 +17043,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Stop the periodic memory monitor (if it was started above).
|
||||
# This also emits one final "[MEMORY] shutdown rss=..." line so the
|
||||
# last RSS reading before gateway exit is always in the log.
|
||||
try:
|
||||
from gateway import memory_monitor as _memory_monitor
|
||||
|
||||
_memory_monitor.stop_memory_monitoring(timeout=2.0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if runner.exit_code is not None:
|
||||
raise SystemExit(runner.exit_code)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue