mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix: stale background processes no longer permanently block session reset
Background processes (e.g. http.server preview) that Hermes starts and forgets about previously blocked session idle/daily reset indefinitely. The reset guard in session.py checked has_active_for_session() with no max age — a 3-day-old preview server blocked reset the same as a task started 30 seconds ago. Changes: - Add max_active_age parameter to has_active_for_session() in process_registry.py. Processes older than this threshold are ignored. - Add MAX_ACTIVE_PROCESS_AGE constant (24h / 86400s). - Wire max_active_age into the gateway's session store callback in run.py so stale processes no longer block session lifecycle. - Add debug logging when reset is skipped due to active processes. - Add 3 tests covering recent, stale, and legacy (None) max age. Fixes #29177
This commit is contained in:
parent
8c8967a50b
commit
33d8b66d5b
4 changed files with 59 additions and 6 deletions
|
|
@ -2660,11 +2660,15 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
|
||||
# Wire process registry into session store for reset protection
|
||||
from tools.process_registry import process_registry
|
||||
# Wire process registry into session store for reset protection.
|
||||
# Processes older than MAX_ACTIVE_PROCESS_AGE (24h) are treated as
|
||||
# stale and no longer block session idle / daily reset — see #29177.
|
||||
from tools.process_registry import MAX_ACTIVE_PROCESS_AGE, process_registry
|
||||
self.session_store = SessionStore(
|
||||
self.config.sessions_dir, self.config,
|
||||
has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
|
||||
has_active_processes_fn=lambda key: process_registry.has_active_for_session(
|
||||
key, max_active_age=MAX_ACTIVE_PROCESS_AGE,
|
||||
),
|
||||
)
|
||||
self.delivery_router = DeliveryRouter(self.config)
|
||||
self._running = False
|
||||
|
|
|
|||
|
|
@ -924,6 +924,10 @@ class SessionStore:
|
|||
"""
|
||||
if self._has_active_processes_fn:
|
||||
if self._has_active_processes_fn(entry.session_key):
|
||||
logger.debug(
|
||||
"Session %s not expired — active background processes",
|
||||
entry.session_key,
|
||||
)
|
||||
return False
|
||||
|
||||
policy = self.config.get_reset_policy(
|
||||
|
|
@ -965,6 +969,10 @@ class SessionStore:
|
|||
if self._has_active_processes_fn:
|
||||
session_key = self._generate_session_key(source)
|
||||
if self._has_active_processes_fn(session_key):
|
||||
logger.debug(
|
||||
"Session reset skipped for %s — active background processes",
|
||||
session_key,
|
||||
)
|
||||
return None
|
||||
|
||||
policy = self.config.get_reset_policy(
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from tools.process_registry import (
|
|||
ProcessSession,
|
||||
FINISHED_TTL_SECONDS,
|
||||
MAX_PROCESSES,
|
||||
MAX_ACTIVE_PROCESS_AGE,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -444,6 +445,27 @@ class TestActiveQueries:
|
|||
assert registry.has_active_for_session("gw_session_1") is True
|
||||
assert registry.has_active_for_session("other") is False
|
||||
|
||||
def test_has_active_for_session_with_max_age_recent(self, registry):
|
||||
"""Recent process is considered active when max_active_age is set."""
|
||||
s = _make_session(started_at=time.time() - 100)
|
||||
s.session_key = "gw_session_1"
|
||||
registry._running[s.id] = s
|
||||
assert registry.has_active_for_session("gw_session_1", max_active_age=3600) is True
|
||||
|
||||
def test_has_active_for_session_with_max_age_stale(self, registry):
|
||||
"""Stale process (older than max_active_age) is ignored."""
|
||||
s = _make_session(started_at=time.time() - 90000) # 25 hours ago
|
||||
s.session_key = "gw_session_1"
|
||||
registry._running[s.id] = s
|
||||
assert registry.has_active_for_session("gw_session_1", max_active_age=86400) is False
|
||||
|
||||
def test_has_active_for_session_max_age_none_preserves_legacy(self, registry):
|
||||
"""Without max_active_age, any running process blocks (legacy behaviour)."""
|
||||
s = _make_session(started_at=time.time() - 90000) # 25 hours ago
|
||||
s.session_key = "gw_session_1"
|
||||
registry._running[s.id] = s
|
||||
assert registry.has_active_for_session("gw_session_1") is True
|
||||
|
||||
def test_exited_not_active(self, registry):
|
||||
s = _make_session(task_id="t1", exited=True, exit_code=0)
|
||||
registry._finished[s.id] = s
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ CHECKPOINT_PATH = get_hermes_home() / "processes.json"
|
|||
MAX_OUTPUT_CHARS = 200_000 # 200KB rolling output buffer
|
||||
FINISHED_TTL_SECONDS = 1800 # Keep finished processes for 30 minutes
|
||||
MAX_PROCESSES = 64 # Max concurrent tracked processes (LRU pruning)
|
||||
MAX_ACTIVE_PROCESS_AGE = 86400 # 24h — stale processes no longer block session reset
|
||||
|
||||
# Watch pattern rate limiting — PER SESSION.
|
||||
# Hard rule: at most ONE watch-match notification every WATCH_MIN_INTERVAL_SECONDS.
|
||||
|
|
@ -1567,17 +1568,35 @@ class ProcessRegistry:
|
|||
for s in self._running.values()
|
||||
)
|
||||
|
||||
def has_active_for_session(self, session_key: str) -> bool:
|
||||
"""Check if there are active processes for a gateway session key."""
|
||||
def has_active_for_session(
|
||||
self, session_key: str, max_active_age: Optional[float] = None,
|
||||
) -> bool:
|
||||
"""Check if there are active processes for a gateway session key.
|
||||
|
||||
When *max_active_age* is set (seconds), processes that started more
|
||||
than that many seconds ago are **ignored** — they are still running
|
||||
but are considered stale and must not block session idle / daily
|
||||
reset. This prevents a forgotten ``http.server`` (or any long-lived
|
||||
preview process) from permanently freezing the session lifecycle.
|
||||
|
||||
Args:
|
||||
session_key: Gateway session key to check.
|
||||
max_active_age: If set, ignore processes older than this many
|
||||
seconds. ``None`` retains the legacy behaviour (any running
|
||||
process blocks).
|
||||
"""
|
||||
with self._lock:
|
||||
sessions = list(self._running.values())
|
||||
|
||||
for session in sessions:
|
||||
self._refresh_detached_session(session)
|
||||
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
return any(
|
||||
s.session_key == session_key and not s.exited
|
||||
s.session_key == session_key
|
||||
and not s.exited
|
||||
and (max_active_age is None or (now - s.started_at) < max_active_age)
|
||||
for s in self._running.values()
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue