hermes-agent/tests/gateway/test_clean_shutdown_marker.py
kshitijk4poor 66827f8947 chore: prune unused imports and duplicate import redefinitions
Remove unused imports (F401) and duplicate/shadowed import
redefinitions (F811) across the codebase using ruff's safe
autofixes. No behavioral changes -- imports only.

- ~1400 safe autofixes applied across 644 files (net -1072 lines)
- __init__.py re-exports preserved (excluded from F401 removal so
  public re-export surfaces stay intact)
- Re-exports that are imported or monkeypatched by tests but look
  unused in their defining module are kept with explicit # noqa:
  F401 (gateway/run.py load_dotenv; run_agent re-exports from
  agent.message_sanitization, agent.context_compressor,
  agent.retry_utils, agent.prompt_builder, agent.process_bootstrap,
  agent.codex_responses_adapter)
- Unsafe F841 (unused-variable) fixes deliberately skipped -- those
  can change behavior when the RHS has side effects
- ruff lints remain disabled in pyproject.toml (only PLW1514 is
  selected); this is a one-time cleanup, not a config change

Verification:
- python -m compileall: clean
- pytest --collect-only: all 27161 tests collect (zero import errors)
- core entry points import clean (run_agent, model_tools, cli,
  toolsets, hermes_state, batch_runner, gateway)
- static scan: every name any test imports directly from an edited
  module still resolves
2026-05-28 22:26:25 -07:00

225 lines
8.9 KiB
Python

"""Tests for the clean shutdown marker that prevents unwanted session auto-resets.
When the gateway shuts down gracefully (hermes update, gateway restart, /restart),
it writes a .clean_shutdown marker. On the next startup, if the marker exists,
suspend_recently_active() is skipped so users don't lose their sessions.
After a crash (no marker), suspension still fires as a safety net for stuck sessions.
"""
from datetime import datetime, timedelta
from unittest.mock import AsyncMock, MagicMock, patch
from gateway.config import GatewayConfig, Platform
from gateway.session import SessionSource, SessionStore
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"):
return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id)
def _make_store(tmp_path, policy=None):
config = GatewayConfig()
if policy:
config.default_reset_policy = policy
return SessionStore(sessions_dir=tmp_path, config=config)
# ---------------------------------------------------------------------------
# SessionStore.suspend_recently_active
# ---------------------------------------------------------------------------
class TestSuspendRecentlyActive:
"""Verify suspend_recently_active only marks recent sessions."""
def test_suspends_recently_active_sessions(self, tmp_path):
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
assert not entry.suspended
count = store.suspend_recently_active()
assert count == 1
# Re-fetch — should be resume_pending (preserved, not wiped)
refreshed = store.get_or_create_session(source)
assert refreshed.resume_pending
assert refreshed.session_id == entry.session_id # same session preserved
def test_does_not_suspend_old_sessions(self, tmp_path):
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
# Backdate the session's updated_at beyond the cutoff
with store._lock:
entry.updated_at = datetime.now() - timedelta(seconds=300)
store._save()
count = store.suspend_recently_active(max_age_seconds=120)
assert count == 0
def test_already_resume_pending_not_double_counted(self, tmp_path):
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
# Mark resume_pending once
count1 = store.suspend_recently_active()
assert count1 == 1
# Re-fetch returns the SAME session (preserved, not reset)
entry2 = store.get_or_create_session(source)
assert entry2.session_id == entry.session_id
# Second call skips already-resume_pending entries
count2 = store.suspend_recently_active()
assert count2 == 0
# ---------------------------------------------------------------------------
# Clean shutdown marker integration
# ---------------------------------------------------------------------------
class TestCleanShutdownMarker:
"""Test that the marker file controls session suspension on startup."""
def test_marker_written_on_graceful_stop(self, tmp_path, monkeypatch):
"""stop() should write .clean_shutdown marker."""
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
marker = tmp_path / ".clean_shutdown"
assert not marker.exists()
# Create a minimal runner and call the shutdown logic directly
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._restart_requested = False
runner._restart_detached = False
runner._restart_via_service = False
runner._restart_task_started = False
runner._running = True
runner._draining = False
runner._stop_task = None
runner._running_agents = {}
runner._pending_messages = {}
runner._pending_approvals = {}
runner._background_tasks = set()
runner._shutdown_event = MagicMock()
runner._restart_drain_timeout = 5
runner._exit_code = None
runner._exit_reason = None
runner.adapters = {}
runner.config = GatewayConfig()
# Mock heavy dependencies
with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
patch("gateway.run.GatewayRunner._update_runtime_status"), \
patch("gateway.status.remove_pid_file"), \
patch("tools.process_registry.process_registry") as mock_proc_reg, \
patch("tools.terminal_tool.cleanup_all_environments"), \
patch("tools.browser_tool.cleanup_all_browsers"):
mock_proc_reg.kill_all = MagicMock()
import asyncio
asyncio.get_event_loop().run_until_complete(runner.stop())
assert marker.exists(), ".clean_shutdown marker should exist after graceful stop"
def test_marker_skips_suspension_on_startup(self, tmp_path, monkeypatch):
"""If .clean_shutdown exists, suspend_recently_active should NOT be called."""
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
# Create the marker
marker = tmp_path / ".clean_shutdown"
marker.touch()
# Create a store with a recently active session
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
assert not entry.suspended
# Simulate what start() does:
if marker.exists():
marker.unlink()
# Should NOT call suspend_recently_active
else:
store.suspend_recently_active()
# Session should NOT be suspended
with store._lock:
store._ensure_loaded_locked()
for e in store._entries.values():
assert not e.suspended, "Session should NOT be suspended after clean shutdown"
assert not marker.exists(), "Marker should be cleaned up"
def test_no_marker_triggers_suspension(self, tmp_path, monkeypatch):
"""Without .clean_shutdown marker (crash), suspension should fire."""
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
marker = tmp_path / ".clean_shutdown"
assert not marker.exists()
# Create a store with a recently active session
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
assert not entry.suspended
# Simulate what start() does:
if marker.exists():
marker.unlink()
else:
store.suspend_recently_active()
# Session SHOULD be resume_pending (crash recovery preserves history)
with store._lock:
store._ensure_loaded_locked()
resume_count = sum(1 for e in store._entries.values() if e.resume_pending)
assert resume_count == 1, "Session should be resume_pending after crash (no marker)"
def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch):
"""stop(restart=True) should also write the marker."""
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
marker = tmp_path / ".clean_shutdown"
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._restart_requested = False
runner._restart_detached = False
runner._restart_via_service = False
runner._restart_task_started = False
runner._running = True
runner._draining = False
runner._stop_task = None
runner._running_agents = {}
runner._pending_messages = {}
runner._pending_approvals = {}
runner._background_tasks = set()
runner._shutdown_event = MagicMock()
runner._restart_drain_timeout = 5
runner._exit_code = None
runner._exit_reason = None
runner.adapters = {}
runner.config = GatewayConfig()
with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
patch("gateway.run.GatewayRunner._update_runtime_status"), \
patch("gateway.status.remove_pid_file"), \
patch("tools.process_registry.process_registry") as mock_proc_reg, \
patch("tools.terminal_tool.cleanup_all_environments"), \
patch("tools.browser_tool.cleanup_all_browsers"):
mock_proc_reg.kill_all = MagicMock()
import asyncio
asyncio.get_event_loop().run_until_complete(runner.stop(restart=True))
assert marker.exists(), ".clean_shutdown marker should exist after restart-stop too"