"""Tests for the clean shutdown marker that prevents unwanted session auto-resets. When the gateway shuts down gracefully (hermes update, gateway restart, /restart), it writes a .clean_shutdown marker. On the next startup, if the marker exists, suspend_recently_active() is skipped so users don't lose their sessions. After a crash (no marker), suspension still fires as a safety net for stuck sessions. """ import os from datetime import datetime, timedelta from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest from gateway.config import GatewayConfig, Platform, PlatformConfig, SessionResetPolicy from gateway.session import SessionEntry, SessionSource, SessionStore # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"): return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id) def _make_store(tmp_path, policy=None): config = GatewayConfig() if policy: config.default_reset_policy = policy return SessionStore(sessions_dir=tmp_path, config=config) # --------------------------------------------------------------------------- # SessionStore.suspend_recently_active # --------------------------------------------------------------------------- class TestSuspendRecentlyActive: """Verify suspend_recently_active only marks recent sessions.""" def test_suspends_recently_active_sessions(self, tmp_path): store = _make_store(tmp_path) source = _make_source() entry = store.get_or_create_session(source) assert not entry.suspended count = store.suspend_recently_active() assert count == 1 # Re-fetch — should be suspended now refreshed = store.get_or_create_session(source) assert refreshed.was_auto_reset def test_does_not_suspend_old_sessions(self, tmp_path): store = _make_store(tmp_path) source = _make_source() entry = store.get_or_create_session(source) # Backdate the session's updated_at beyond the cutoff with store._lock: entry.updated_at = datetime.now() - timedelta(seconds=300) store._save() count = store.suspend_recently_active(max_age_seconds=120) assert count == 0 def test_already_suspended_not_double_counted(self, tmp_path): store = _make_store(tmp_path) source = _make_source() entry = store.get_or_create_session(source) # Suspend once count1 = store.suspend_recently_active() assert count1 == 1 # Create a new session (the old one got reset on next access) entry2 = store.get_or_create_session(source) # Suspend again — the new session is recent but not yet suspended count2 = store.suspend_recently_active() assert count2 == 1 # --------------------------------------------------------------------------- # Clean shutdown marker integration # --------------------------------------------------------------------------- class TestCleanShutdownMarker: """Test that the marker file controls session suspension on startup.""" def test_marker_written_on_graceful_stop(self, tmp_path, monkeypatch): """stop() should write .clean_shutdown marker.""" monkeypatch.setattr("gateway.run._hermes_home", tmp_path) marker = tmp_path / ".clean_shutdown" assert not marker.exists() # Create a minimal runner and call the shutdown logic directly from gateway.run import GatewayRunner runner = object.__new__(GatewayRunner) runner._restart_requested = False runner._restart_detached = False runner._restart_via_service = False runner._restart_task_started = False runner._running = True runner._draining = False runner._stop_task = None runner._running_agents = {} runner._pending_messages = {} runner._pending_approvals = {} runner._background_tasks = set() runner._shutdown_event = MagicMock() runner._restart_drain_timeout = 5 runner._exit_code = None runner._exit_reason = None runner.adapters = {} runner.config = GatewayConfig() # Mock heavy dependencies with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \ patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \ patch("gateway.run.GatewayRunner._update_runtime_status"), \ patch("gateway.status.remove_pid_file"), \ patch("tools.process_registry.process_registry") as mock_proc_reg, \ patch("tools.terminal_tool.cleanup_all_environments"), \ patch("tools.browser_tool.cleanup_all_browsers"): mock_proc_reg.kill_all = MagicMock() import asyncio asyncio.get_event_loop().run_until_complete(runner.stop()) assert marker.exists(), ".clean_shutdown marker should exist after graceful stop" def test_marker_skips_suspension_on_startup(self, tmp_path, monkeypatch): """If .clean_shutdown exists, suspend_recently_active should NOT be called.""" monkeypatch.setattr("gateway.run._hermes_home", tmp_path) # Create the marker marker = tmp_path / ".clean_shutdown" marker.touch() # Create a store with a recently active session store = _make_store(tmp_path) source = _make_source() entry = store.get_or_create_session(source) assert not entry.suspended # Simulate what start() does: if marker.exists(): marker.unlink() # Should NOT call suspend_recently_active else: store.suspend_recently_active() # Session should NOT be suspended with store._lock: store._ensure_loaded_locked() for e in store._entries.values(): assert not e.suspended, "Session should NOT be suspended after clean shutdown" assert not marker.exists(), "Marker should be cleaned up" def test_no_marker_triggers_suspension(self, tmp_path, monkeypatch): """Without .clean_shutdown marker (crash), suspension should fire.""" monkeypatch.setattr("gateway.run._hermes_home", tmp_path) marker = tmp_path / ".clean_shutdown" assert not marker.exists() # Create a store with a recently active session store = _make_store(tmp_path) source = _make_source() entry = store.get_or_create_session(source) assert not entry.suspended # Simulate what start() does: if marker.exists(): marker.unlink() else: store.suspend_recently_active() # Session SHOULD be suspended (crash recovery) with store._lock: store._ensure_loaded_locked() suspended_count = sum(1 for e in store._entries.values() if e.suspended) assert suspended_count == 1, "Session should be suspended after crash (no marker)" def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch): """stop(restart=True) should also write the marker.""" monkeypatch.setattr("gateway.run._hermes_home", tmp_path) marker = tmp_path / ".clean_shutdown" from gateway.run import GatewayRunner runner = object.__new__(GatewayRunner) runner._restart_requested = False runner._restart_detached = False runner._restart_via_service = False runner._restart_task_started = False runner._running = True runner._draining = False runner._stop_task = None runner._running_agents = {} runner._pending_messages = {} runner._pending_approvals = {} runner._background_tasks = set() runner._shutdown_event = MagicMock() runner._restart_drain_timeout = 5 runner._exit_code = None runner._exit_reason = None runner.adapters = {} runner.config = GatewayConfig() with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \ patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \ patch("gateway.run.GatewayRunner._update_runtime_status"), \ patch("gateway.status.remove_pid_file"), \ patch("tools.process_registry.process_registry") as mock_proc_reg, \ patch("tools.terminal_tool.cleanup_all_environments"), \ patch("tools.browser_tool.cleanup_all_browsers"): mock_proc_reg.kill_all = MagicMock() import asyncio asyncio.get_event_loop().run_until_complete(runner.stop(restart=True)) assert marker.exists(), ".clean_shutdown marker should exist after restart-stop too"