mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: prevent unwanted session auto-reset after graceful gateway restarts (#8299)
When the gateway shuts down gracefully (hermes update, gateway restart, /restart), it now writes a .clean_shutdown marker file. On the next startup, if this marker exists, suspend_recently_active() is skipped and the marker is cleaned up. Previously, suspend_recently_active() fired on EVERY startup — including planned restarts from hermes update or hermes gateway restart. This caused users to lose their conversation history unexpectedly: the session would be marked as suspended, and the next message would trigger an auto-reset with a notification the user never asked for. The original purpose of suspend_recently_active() is crash recovery — preventing stuck sessions that were mid-processing when the gateway died unexpectedly. Graceful shutdowns already drain active agents via _drain_active_agents(), so there is no stuck-session risk. After a crash (no marker written), suspension still fires as before. Fixes the scenario where a user asks the agent to run hermes update, the gateway restarts, and the user's next message gets an unwanted 'Session automatically reset' notification with their history cleared.
This commit is contained in:
parent
56e3ee2440
commit
b6b6b02f0f
2 changed files with 254 additions and 6 deletions
|
|
@ -1518,12 +1518,25 @@ class GatewayRunner:
|
|||
# This prevents stuck sessions from being blindly resumed on restart,
|
||||
# which can create an unrecoverable loop (#7536). Suspended sessions
|
||||
# auto-reset on the next incoming message, giving the user a clean start.
|
||||
try:
|
||||
suspended = self.session_store.suspend_recently_active()
|
||||
if suspended:
|
||||
logger.info("Suspended %d in-flight session(s) from previous run", suspended)
|
||||
except Exception as e:
|
||||
logger.warning("Session suspension on startup failed: %s", e)
|
||||
#
|
||||
# SKIP suspension after a clean (graceful) shutdown — the previous
|
||||
# process already drained active agents, so sessions aren't stuck.
|
||||
# This prevents unwanted auto-resets after `hermes update`,
|
||||
# `hermes gateway restart`, or `/restart`.
|
||||
_clean_marker = _hermes_home / ".clean_shutdown"
|
||||
if _clean_marker.exists():
|
||||
logger.info("Previous gateway exited cleanly — skipping session suspension")
|
||||
try:
|
||||
_clean_marker.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
suspended = self.session_store.suspend_recently_active()
|
||||
if suspended:
|
||||
logger.info("Suspended %d in-flight session(s) from previous run", suspended)
|
||||
except Exception as e:
|
||||
logger.warning("Session suspension on startup failed: %s", e)
|
||||
|
||||
connected_count = 0
|
||||
enabled_platform_count = 0
|
||||
|
|
@ -2049,6 +2062,15 @@ class GatewayRunner:
|
|||
from gateway.status import remove_pid_file
|
||||
remove_pid_file()
|
||||
|
||||
# Write a clean-shutdown marker so the next startup knows this
|
||||
# wasn't a crash. suspend_recently_active() only needs to run
|
||||
# after unexpected exits — graceful shutdowns already drain
|
||||
# active agents, so there's no stuck-session risk.
|
||||
try:
|
||||
(_hermes_home / ".clean_shutdown").touch()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self._restart_requested and self._restart_via_service:
|
||||
self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
|
||||
self._exit_reason = self._exit_reason or "Gateway restart requested"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue