mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(gateway): prune stale SessionStore entries to bound memory + disk (#11789)
SessionStore._entries grew unbounded. Every unique
(platform, chat_id, thread_id, user_id) tuple ever seen was kept in
RAM and rewritten to sessions.json on every message. A Discord bot
in 100 servers x 100 channels x ~100 rotating users accumulates on
the order of 10^5 entries after a few months; each sessions.json
write becomes an O(n) fsync. Nothing trimmed this — there was no
TTL, no cap, no eviction path.
Changes
-------
* SessionStore.prune_old_entries(max_age_days) — drops entries whose
updated_at is older than the cutoff. Preserves:
- suspended entries (user paused them via /stop for later resume)
- entries with an active background process attached
Pruning is functionally identical to a natural reset-policy expiry:
SQLite transcript stays, session_key -> session_id mapping dropped,
returning user gets a fresh session.
* GatewayConfig.session_store_max_age_days (default 90; 0 disables).
Serialized in to_dict/from_dict, coerced from bad types / negatives
to safe defaults. No migration needed — missing field -> 90 days.
* _session_expiry_watcher calls prune_old_entries once per hour
(first tick is immediate). Uses the existing watcher loop so no
new background task is created.
Why not more aggressive
-----------------------
90 days is long enough that legitimate long-idle users (seasonal,
vacation, etc.) aren't surprised — pruning just means they get a
fresh session on return, same outcome they'd get from any other
reset-policy trigger. Admins can lower it via config; 0 disables.
Tests
-----
tests/gateway/test_session_store_prune.py — 17 cases covering:
* entry age based on updated_at, not created_at
* max_age_days=0 disables; negative coerces to 0
* suspended + active-process entries are skipped
* _save fires iff something was removed
* disk JSON reflects post-prune state
* thread safety against concurrent readers
* config field roundtrips + graceful fallback on bad values
* watcher gate logic (first tick prunes, subsequent within 1h don't)
119 broader session/gateway tests remain green.
This commit is contained in:
parent
f362083c64
commit
eb07c05646
4 changed files with 361 additions and 0 deletions
|
|
@ -802,6 +802,57 @@ class SessionStore:
|
|||
return True
|
||||
return False
|
||||
|
||||
def prune_old_entries(self, max_age_days: int) -> int:
|
||||
"""Drop SessionEntry records older than max_age_days.
|
||||
|
||||
Pruning is based on ``updated_at`` (last activity), not ``created_at``.
|
||||
A session that's been active within the window is kept regardless of
|
||||
how old it is. Entries marked ``suspended`` are kept — the user
|
||||
explicitly paused them for later resume. Entries held by an active
|
||||
process (via has_active_processes_fn) are also kept so long-running
|
||||
background work isn't orphaned.
|
||||
|
||||
Pruning is functionally identical to a natural reset-policy expiry:
|
||||
the transcript in SQLite stays, but the session_key → session_id
|
||||
mapping is dropped and the user starts a fresh session on return.
|
||||
|
||||
``max_age_days <= 0`` disables pruning; returns 0 immediately.
|
||||
Returns the number of entries removed.
|
||||
"""
|
||||
if max_age_days is None or max_age_days <= 0:
|
||||
return 0
|
||||
from datetime import timedelta
|
||||
|
||||
cutoff = _now() - timedelta(days=max_age_days)
|
||||
removed_keys: list[str] = []
|
||||
|
||||
with self._lock:
|
||||
self._ensure_loaded_locked()
|
||||
for key, entry in list(self._entries.items()):
|
||||
if entry.suspended:
|
||||
continue
|
||||
# Never prune sessions with an active background process
|
||||
# attached — the user may still be waiting on output.
|
||||
if self._has_active_processes_fn is not None:
|
||||
try:
|
||||
if self._has_active_processes_fn(entry.session_id):
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
if entry.updated_at < cutoff:
|
||||
removed_keys.append(key)
|
||||
for key in removed_keys:
|
||||
self._entries.pop(key, None)
|
||||
if removed_keys:
|
||||
self._save()
|
||||
|
||||
if removed_keys:
|
||||
logger.info(
|
||||
"SessionStore pruned %d entries older than %d days",
|
||||
len(removed_keys), max_age_days,
|
||||
)
|
||||
return len(removed_keys)
|
||||
|
||||
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
|
||||
"""Mark recently-active sessions as suspended.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue