mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
fix(gateway): call agent.close() on session end to prevent zombies
Wire AIAgent.close() into every gateway code path where an agent's session is actually ending: - stop(): close all running agents after interrupt + memory shutdown, then call cleanup_all_environments() and cleanup_all_browsers() as a global catch-all - _session_expiry_watcher(): close agents when sessions expire after the 5-minute idle timeout - _handle_reset_command(): close the old agent before evicting it from cache on /new or /reset Note: _evict_cached_agent() intentionally does NOT call close() because it is also used for non-destructive cache refreshes (model switch, branch, fallback) where tool resources should persist. Ref: #7131
This commit is contained in:
parent
5b42aecfa7
commit
fbe28352e4
1 changed files with 42 additions and 2 deletions
|
|
@ -1356,6 +1356,12 @@ class GatewayRunner:
|
|||
cached_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
# Close tool resources to prevent zombie processes
|
||||
try:
|
||||
if hasattr(cached_agent, 'close'):
|
||||
cached_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
# Mark as flushed and persist to disk so the flag
|
||||
# survives gateway restarts.
|
||||
with self.session_store._lock:
|
||||
|
|
@ -1536,6 +1542,14 @@ class GatewayRunner:
|
|||
agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
# Close tool resources (terminal sandboxes, browser daemons,
|
||||
# background processes, httpx clients) to prevent zombie
|
||||
# process accumulation.
|
||||
try:
|
||||
if hasattr(agent, 'close'):
|
||||
agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for platform, adapter in list(self.adapters.items()):
|
||||
try:
|
||||
|
|
@ -1558,7 +1572,20 @@ class GatewayRunner:
|
|||
self._pending_messages.clear()
|
||||
self._pending_approvals.clear()
|
||||
self._shutdown_event.set()
|
||||
|
||||
|
||||
# Global cleanup: kill any remaining tool subprocesses not tied
|
||||
# to a specific agent (catch-all for zombie prevention).
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from gateway.status import remove_pid_file, write_runtime_status
|
||||
remove_pid_file()
|
||||
try:
|
||||
|
|
@ -3335,8 +3362,21 @@ class GatewayRunner:
|
|||
_flush_task.add_done_callback(self._background_tasks.discard)
|
||||
except Exception as e:
|
||||
logger.debug("Gateway memory flush on reset failed: %s", e)
|
||||
# Close tool resources on the old agent (terminal sandboxes, browser
|
||||
# daemons, background processes) before evicting from cache.
|
||||
_lock = getattr(self, "_agent_cache_lock", None)
|
||||
if _lock:
|
||||
with _lock:
|
||||
_cached = self._agent_cache.get(session_key)
|
||||
_old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
|
||||
if _old_agent is not None:
|
||||
try:
|
||||
if hasattr(_old_agent, "close"):
|
||||
_old_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._evict_cached_agent(session_key)
|
||||
|
||||
|
||||
try:
|
||||
from tools.env_passthrough import clear_env_passthrough
|
||||
clear_env_passthrough()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue